aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-10-26 10:26:53 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-10-26 10:26:53 -0400
commit19b4a8d520a6e0176dd52aaa429261ad4fcaa545 (patch)
tree6dcf5a780718fc50b9cd79cc803daa7c7e080a02
parent3cfef9524677a4ecb392d6fbffe6ebce6302f1d4 (diff)
parent048b718029033af117870d3da47da12995be14a3 (diff)
Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (45 commits) rcu: Move propagation of ->completed from rcu_start_gp() to rcu_report_qs_rsp() rcu: Remove rcu_needs_cpu_flush() to avoid false quiescent states rcu: Wire up RCU_BOOST_PRIO for rcutree rcu: Make rcu_torture_boost() exit loops at end of test rcu: Make rcu_torture_fqs() exit loops at end of test rcu: Permit rt_mutex_unlock() with irqs disabled rcu: Avoid having just-onlined CPU resched itself when RCU is idle rcu: Suppress NMI backtraces when stall ends before dump rcu: Prohibit grace periods during early boot rcu: Simplify unboosting checks rcu: Prevent early boot set_need_resched() from __rcu_pending() rcu: Dump local stack if cannot dump all CPUs' stacks rcu: Move __rcu_read_unlock()'s barrier() within if-statement rcu: Improve rcu_assign_pointer() and RCU_INIT_POINTER() documentation rcu: Make rcu_assign_pointer() unconditionally insert a memory barrier rcu: Make rcu_implicit_dynticks_qs() locals be correct size rcu: Eliminate in_irq() checks in rcu_enter_nohz() nohz: Remove nohz_cpu_mask rcu: Document interpretation of RCU-lockdep splats rcu: Allow rcutorture's stat_interval parameter to be changed at runtime ...
-rw-r--r--Documentation/RCU/NMI-RCU.txt2
-rw-r--r--Documentation/RCU/lockdep-splat.txt110
-rw-r--r--Documentation/RCU/lockdep.txt34
-rw-r--r--Documentation/RCU/torture.txt137
-rw-r--r--Documentation/RCU/trace.txt38
-rw-r--r--include/linux/lockdep.h2
-rw-r--r--include/linux/rcupdate.h300
-rw-r--r--include/linux/rcutiny.h20
-rw-r--r--include/linux/rcutree.h2
-rw-r--r--include/linux/sched.h4
-rw-r--r--include/linux/types.h10
-rw-r--r--include/trace/events/rcu.h459
-rw-r--r--init/Kconfig6
-rw-r--r--kernel/lockdep.c84
-rw-r--r--kernel/pid.c4
-rw-r--r--kernel/rcu.h85
-rw-r--r--kernel/rcupdate.c26
-rw-r--r--kernel/rcutiny.c117
-rw-r--r--kernel/rcutiny_plugin.h134
-rw-r--r--kernel/rcutorture.c77
-rw-r--r--kernel/rcutree.c290
-rw-r--r--kernel/rcutree.h17
-rw-r--r--kernel/rcutree_plugin.h150
-rw-r--r--kernel/rcutree_trace.c13
-rw-r--r--kernel/rtmutex.c8
-rw-r--r--kernel/sched.c13
-rw-r--r--kernel/time/tick-sched.c6
27 files changed, 1489 insertions, 659 deletions
diff --git a/Documentation/RCU/NMI-RCU.txt b/Documentation/RCU/NMI-RCU.txt
index bf82851a0e57..687777f83b23 100644
--- a/Documentation/RCU/NMI-RCU.txt
+++ b/Documentation/RCU/NMI-RCU.txt
@@ -95,7 +95,7 @@ not to return until all ongoing NMI handlers exit. It is therefore safe
95to free up the handler's data as soon as synchronize_sched() returns. 95to free up the handler's data as soon as synchronize_sched() returns.
96 96
97Important note: for this to work, the architecture in question must 97Important note: for this to work, the architecture in question must
98invoke irq_enter() and irq_exit() on NMI entry and exit, respectively. 98invoke nmi_enter() and nmi_exit() on NMI entry and exit, respectively.
99 99
100 100
101Answer to Quick Quiz 101Answer to Quick Quiz
diff --git a/Documentation/RCU/lockdep-splat.txt b/Documentation/RCU/lockdep-splat.txt
new file mode 100644
index 000000000000..bf9061142827
--- /dev/null
+++ b/Documentation/RCU/lockdep-splat.txt
@@ -0,0 +1,110 @@
1Lockdep-RCU was added to the Linux kernel in early 2010
2(http://lwn.net/Articles/371986/). This facility checks for some common
3misuses of the RCU API, most notably using one of the rcu_dereference()
4family to access an RCU-protected pointer without the proper protection.
5When such misuse is detected, an lockdep-RCU splat is emitted.
6
7The usual cause of a lockdep-RCU slat is someone accessing an
8RCU-protected data structure without either (1) being in the right kind of
9RCU read-side critical section or (2) holding the right update-side lock.
10This problem can therefore be serious: it might result in random memory
11overwriting or worse. There can of course be false positives, this
12being the real world and all that.
13
14So let's look at an example RCU lockdep splat from 3.0-rc5, one that
15has long since been fixed:
16
17===============================
18[ INFO: suspicious RCU usage. ]
19-------------------------------
20block/cfq-iosched.c:2776 suspicious rcu_dereference_protected() usage!
21
22other info that might help us debug this:
23
24
25rcu_scheduler_active = 1, debug_locks = 0
263 locks held by scsi_scan_6/1552:
27 #0: (&shost->scan_mutex){+.+.+.}, at: [<ffffffff8145efca>]
28scsi_scan_host_selected+0x5a/0x150
29 #1: (&eq->sysfs_lock){+.+...}, at: [<ffffffff812a5032>]
30elevator_exit+0x22/0x60
31 #2: (&(&q->__queue_lock)->rlock){-.-...}, at: [<ffffffff812b6233>]
32cfq_exit_queue+0x43/0x190
33
34stack backtrace:
35Pid: 1552, comm: scsi_scan_6 Not tainted 3.0.0-rc5 #17
36Call Trace:
37 [<ffffffff810abb9b>] lockdep_rcu_dereference+0xbb/0xc0
38 [<ffffffff812b6139>] __cfq_exit_single_io_context+0xe9/0x120
39 [<ffffffff812b626c>] cfq_exit_queue+0x7c/0x190
40 [<ffffffff812a5046>] elevator_exit+0x36/0x60
41 [<ffffffff812a802a>] blk_cleanup_queue+0x4a/0x60
42 [<ffffffff8145cc09>] scsi_free_queue+0x9/0x10
43 [<ffffffff81460944>] __scsi_remove_device+0x84/0xd0
44 [<ffffffff8145dca3>] scsi_probe_and_add_lun+0x353/0xb10
45 [<ffffffff817da069>] ? error_exit+0x29/0xb0
46 [<ffffffff817d98ed>] ? _raw_spin_unlock_irqrestore+0x3d/0x80
47 [<ffffffff8145e722>] __scsi_scan_target+0x112/0x680
48 [<ffffffff812c690d>] ? trace_hardirqs_off_thunk+0x3a/0x3c
49 [<ffffffff817da069>] ? error_exit+0x29/0xb0
50 [<ffffffff812bcc60>] ? kobject_del+0x40/0x40
51 [<ffffffff8145ed16>] scsi_scan_channel+0x86/0xb0
52 [<ffffffff8145f0b0>] scsi_scan_host_selected+0x140/0x150
53 [<ffffffff8145f149>] do_scsi_scan_host+0x89/0x90
54 [<ffffffff8145f170>] do_scan_async+0x20/0x160
55 [<ffffffff8145f150>] ? do_scsi_scan_host+0x90/0x90
56 [<ffffffff810975b6>] kthread+0xa6/0xb0
57 [<ffffffff817db154>] kernel_thread_helper+0x4/0x10
58 [<ffffffff81066430>] ? finish_task_switch+0x80/0x110
59 [<ffffffff817d9c04>] ? retint_restore_args+0xe/0xe
60 [<ffffffff81097510>] ? __init_kthread_worker+0x70/0x70
61 [<ffffffff817db150>] ? gs_change+0xb/0xb
62
63Line 2776 of block/cfq-iosched.c in v3.0-rc5 is as follows:
64
65 if (rcu_dereference(ioc->ioc_data) == cic) {
66
67This form says that it must be in a plain vanilla RCU read-side critical
68section, but the "other info" list above shows that this is not the
69case. Instead, we hold three locks, one of which might be RCU related.
70And maybe that lock really does protect this reference. If so, the fix
71is to inform RCU, perhaps by changing __cfq_exit_single_io_context() to
72take the struct request_queue "q" from cfq_exit_queue() as an argument,
73which would permit us to invoke rcu_dereference_protected as follows:
74
75 if (rcu_dereference_protected(ioc->ioc_data,
76 lockdep_is_held(&q->queue_lock)) == cic) {
77
78With this change, there would be no lockdep-RCU splat emitted if this
79code was invoked either from within an RCU read-side critical section
80or with the ->queue_lock held. In particular, this would have suppressed
81the above lockdep-RCU splat because ->queue_lock is held (see #2 in the
82list above).
83
84On the other hand, perhaps we really do need an RCU read-side critical
85section. In this case, the critical section must span the use of the
86return value from rcu_dereference(), or at least until there is some
87reference count incremented or some such. One way to handle this is to
88add rcu_read_lock() and rcu_read_unlock() as follows:
89
90 rcu_read_lock();
91 if (rcu_dereference(ioc->ioc_data) == cic) {
92 spin_lock(&ioc->lock);
93 rcu_assign_pointer(ioc->ioc_data, NULL);
94 spin_unlock(&ioc->lock);
95 }
96 rcu_read_unlock();
97
98With this change, the rcu_dereference() is always within an RCU
99read-side critical section, which again would have suppressed the
100above lockdep-RCU splat.
101
102But in this particular case, we don't actually deference the pointer
103returned from rcu_dereference(). Instead, that pointer is just compared
104to the cic pointer, which means that the rcu_dereference() can be replaced
105by rcu_access_pointer() as follows:
106
107 if (rcu_access_pointer(ioc->ioc_data) == cic) {
108
109Because it is legal to invoke rcu_access_pointer() without protection,
110this change would also suppress the above lockdep-RCU splat.
diff --git a/Documentation/RCU/lockdep.txt b/Documentation/RCU/lockdep.txt
index d7a49b2f6994..a102d4b3724b 100644
--- a/Documentation/RCU/lockdep.txt
+++ b/Documentation/RCU/lockdep.txt
@@ -32,9 +32,27 @@ checking of rcu_dereference() primitives:
32 srcu_dereference(p, sp): 32 srcu_dereference(p, sp):
33 Check for SRCU read-side critical section. 33 Check for SRCU read-side critical section.
34 rcu_dereference_check(p, c): 34 rcu_dereference_check(p, c):
35 Use explicit check expression "c". This is useful in 35 Use explicit check expression "c" along with
36 code that is invoked by both readers and updaters. 36 rcu_read_lock_held(). This is useful in code that is
37 rcu_dereference_raw(p) 37 invoked by both RCU readers and updaters.
38 rcu_dereference_bh_check(p, c):
39 Use explicit check expression "c" along with
40 rcu_read_lock_bh_held(). This is useful in code that
41 is invoked by both RCU-bh readers and updaters.
42 rcu_dereference_sched_check(p, c):
43 Use explicit check expression "c" along with
44 rcu_read_lock_sched_held(). This is useful in code that
45 is invoked by both RCU-sched readers and updaters.
46 srcu_dereference_check(p, c):
47 Use explicit check expression "c" along with
48 srcu_read_lock_held()(). This is useful in code that
49 is invoked by both SRCU readers and updaters.
50 rcu_dereference_index_check(p, c):
51 Use explicit check expression "c", but the caller
52 must supply one of the rcu_read_lock_held() functions.
53 This is useful in code that uses RCU-protected arrays
54 that is invoked by both RCU readers and updaters.
55 rcu_dereference_raw(p):
38 Don't check. (Use sparingly, if at all.) 56 Don't check. (Use sparingly, if at all.)
39 rcu_dereference_protected(p, c): 57 rcu_dereference_protected(p, c):
40 Use explicit check expression "c", and omit all barriers 58 Use explicit check expression "c", and omit all barriers
@@ -48,13 +66,11 @@ checking of rcu_dereference() primitives:
48 value of the pointer itself, for example, against NULL. 66 value of the pointer itself, for example, against NULL.
49 67
50The rcu_dereference_check() check expression can be any boolean 68The rcu_dereference_check() check expression can be any boolean
51expression, but would normally include one of the rcu_read_lock_held() 69expression, but would normally include a lockdep expression. However,
52family of functions and a lockdep expression. However, any boolean 70any boolean expression can be used. For a moderately ornate example,
53expression can be used. For a moderately ornate example, consider 71consider the following:
54the following:
55 72
56 file = rcu_dereference_check(fdt->fd[fd], 73 file = rcu_dereference_check(fdt->fd[fd],
57 rcu_read_lock_held() ||
58 lockdep_is_held(&files->file_lock) || 74 lockdep_is_held(&files->file_lock) ||
59 atomic_read(&files->count) == 1); 75 atomic_read(&files->count) == 1);
60 76
@@ -62,7 +78,7 @@ This expression picks up the pointer "fdt->fd[fd]" in an RCU-safe manner,
62and, if CONFIG_PROVE_RCU is configured, verifies that this expression 78and, if CONFIG_PROVE_RCU is configured, verifies that this expression
63is used in: 79is used in:
64 80
651. An RCU read-side critical section, or 811. An RCU read-side critical section (implicit), or
662. with files->file_lock held, or 822. with files->file_lock held, or
673. on an unshared files_struct. 833. on an unshared files_struct.
68 84
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt
index 5d9016795fd8..783d6c134d3f 100644
--- a/Documentation/RCU/torture.txt
+++ b/Documentation/RCU/torture.txt
@@ -42,7 +42,7 @@ fqs_holdoff Holdoff time (in microseconds) between consecutive calls
42fqs_stutter Wait time (in seconds) between consecutive bursts 42fqs_stutter Wait time (in seconds) between consecutive bursts
43 of calls to force_quiescent_state(). 43 of calls to force_quiescent_state().
44 44
45irqreaders Says to invoke RCU readers from irq level. This is currently 45irqreader Says to invoke RCU readers from irq level. This is currently
46 done via timers. Defaults to "1" for variants of RCU that 46 done via timers. Defaults to "1" for variants of RCU that
47 permit this. (Or, more accurately, variants of RCU that do 47 permit this. (Or, more accurately, variants of RCU that do
48 -not- permit this know to ignore this variable.) 48 -not- permit this know to ignore this variable.)
@@ -79,19 +79,68 @@ stutter The length of time to run the test before pausing for this
79 Specifying "stutter=0" causes the test to run continuously 79 Specifying "stutter=0" causes the test to run continuously
80 without pausing, which is the old default behavior. 80 without pausing, which is the old default behavior.
81 81
82test_boost Whether or not to test the ability of RCU to do priority
83 boosting. Defaults to "test_boost=1", which performs
84 RCU priority-inversion testing only if the selected
85 RCU implementation supports priority boosting. Specifying
86 "test_boost=0" never performs RCU priority-inversion
87 testing. Specifying "test_boost=2" performs RCU
88 priority-inversion testing even if the selected RCU
89 implementation does not support RCU priority boosting,
90 which can be used to test rcutorture's ability to
91 carry out RCU priority-inversion testing.
92
93test_boost_interval
94 The number of seconds in an RCU priority-inversion test
95 cycle. Defaults to "test_boost_interval=7". It is
96 usually wise for this value to be relatively prime to
97 the value selected for "stutter".
98
99test_boost_duration
100 The number of seconds to do RCU priority-inversion testing
101 within any given "test_boost_interval". Defaults to
102 "test_boost_duration=4".
103
82test_no_idle_hz Whether or not to test the ability of RCU to operate in 104test_no_idle_hz Whether or not to test the ability of RCU to operate in
83 a kernel that disables the scheduling-clock interrupt to 105 a kernel that disables the scheduling-clock interrupt to
84 idle CPUs. Boolean parameter, "1" to test, "0" otherwise. 106 idle CPUs. Boolean parameter, "1" to test, "0" otherwise.
85 Defaults to omitting this test. 107 Defaults to omitting this test.
86 108
87torture_type The type of RCU to test: "rcu" for the rcu_read_lock() API, 109torture_type The type of RCU to test, with string values as follows:
88 "rcu_sync" for rcu_read_lock() with synchronous reclamation, 110
89 "rcu_bh" for the rcu_read_lock_bh() API, "rcu_bh_sync" for 111 "rcu": rcu_read_lock(), rcu_read_unlock() and call_rcu().
90 rcu_read_lock_bh() with synchronous reclamation, "srcu" for 112
91 the "srcu_read_lock()" API, "sched" for the use of 113 "rcu_sync": rcu_read_lock(), rcu_read_unlock(), and
92 preempt_disable() together with synchronize_sched(), 114 synchronize_rcu().
93 and "sched_expedited" for the use of preempt_disable() 115
94 with synchronize_sched_expedited(). 116 "rcu_expedited": rcu_read_lock(), rcu_read_unlock(), and
117 synchronize_rcu_expedited().
118
119 "rcu_bh": rcu_read_lock_bh(), rcu_read_unlock_bh(), and
120 call_rcu_bh().
121
122 "rcu_bh_sync": rcu_read_lock_bh(), rcu_read_unlock_bh(),
123 and synchronize_rcu_bh().
124
125 "rcu_bh_expedited": rcu_read_lock_bh(), rcu_read_unlock_bh(),
126 and synchronize_rcu_bh_expedited().
127
128 "srcu": srcu_read_lock(), srcu_read_unlock() and
129 synchronize_srcu().
130
131 "srcu_expedited": srcu_read_lock(), srcu_read_unlock() and
132 synchronize_srcu_expedited().
133
134 "sched": preempt_disable(), preempt_enable(), and
135 call_rcu_sched().
136
137 "sched_sync": preempt_disable(), preempt_enable(), and
138 synchronize_sched().
139
140 "sched_expedited": preempt_disable(), preempt_enable(), and
141 synchronize_sched_expedited().
142
143 Defaults to "rcu".
95 144
96verbose Enable debug printk()s. Default is disabled. 145verbose Enable debug printk()s. Default is disabled.
97 146
@@ -100,12 +149,12 @@ OUTPUT
100 149
101The statistics output is as follows: 150The statistics output is as follows:
102 151
103 rcu-torture: --- Start of test: nreaders=16 stat_interval=0 verbose=0 152 rcu-torture:--- Start of test: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4
104 rcu-torture: rtc: 0000000000000000 ver: 1916 tfle: 0 rta: 1916 rtaf: 0 rtf: 1915 153 rcu-torture: rtc: (null) ver: 155441 tfle: 0 rta: 155441 rtaf: 8884 rtf: 155440 rtmbe: 0 rtbke: 0 rtbre: 0 rtbf: 0 rtb: 0 nt: 3055767
105 rcu-torture: Reader Pipe: 1466408 9747 0 0 0 0 0 0 0 0 0 154 rcu-torture: Reader Pipe: 727860534 34213 0 0 0 0 0 0 0 0 0
106 rcu-torture: Reader Batch: 1464477 11678 0 0 0 0 0 0 0 0 155 rcu-torture: Reader Batch: 727877838 17003 0 0 0 0 0 0 0 0 0
107 rcu-torture: Free-Block Circulation: 1915 1915 1915 1915 1915 1915 1915 1915 1915 1915 0 156 rcu-torture: Free-Block Circulation: 155440 155440 155440 155440 155440 155440 155440 155440 155440 155440 0
108 rcu-torture: --- End of test 157 rcu-torture:--- End of test: SUCCESS: nreaders=16 nfakewriters=4 stat_interval=30 verbose=0 test_no_idle_hz=1 shuffle_interval=3 stutter=5 irqreader=1 fqs_duration=0 fqs_holdoff=0 fqs_stutter=3 test_boost=1/0 test_boost_interval=7 test_boost_duration=4
109 158
110The command "dmesg | grep torture:" will extract this information on 159The command "dmesg | grep torture:" will extract this information on
111most systems. On more esoteric configurations, it may be necessary to 160most systems. On more esoteric configurations, it may be necessary to
@@ -113,26 +162,55 @@ use other commands to access the output of the printk()s used by
113the RCU torture test. The printk()s use KERN_ALERT, so they should 162the RCU torture test. The printk()s use KERN_ALERT, so they should
114be evident. ;-) 163be evident. ;-)
115 164
165The first and last lines show the rcutorture module parameters, and the
166last line shows either "SUCCESS" or "FAILURE", based on rcutorture's
167automatic determination as to whether RCU operated correctly.
168
116The entries are as follows: 169The entries are as follows:
117 170
118o "rtc": The hexadecimal address of the structure currently visible 171o "rtc": The hexadecimal address of the structure currently visible
119 to readers. 172 to readers.
120 173
121o "ver": The number of times since boot that the rcutw writer task 174o "ver": The number of times since boot that the RCU writer task
122 has changed the structure visible to readers. 175 has changed the structure visible to readers.
123 176
124o "tfle": If non-zero, indicates that the "torture freelist" 177o "tfle": If non-zero, indicates that the "torture freelist"
125 containing structure to be placed into the "rtc" area is empty. 178 containing structures to be placed into the "rtc" area is empty.
126 This condition is important, since it can fool you into thinking 179 This condition is important, since it can fool you into thinking
127 that RCU is working when it is not. :-/ 180 that RCU is working when it is not. :-/
128 181
129o "rta": Number of structures allocated from the torture freelist. 182o "rta": Number of structures allocated from the torture freelist.
130 183
131o "rtaf": Number of allocations from the torture freelist that have 184o "rtaf": Number of allocations from the torture freelist that have
132 failed due to the list being empty. 185 failed due to the list being empty. It is not unusual for this
186 to be non-zero, but it is bad for it to be a large fraction of
187 the value indicated by "rta".
133 188
134o "rtf": Number of frees into the torture freelist. 189o "rtf": Number of frees into the torture freelist.
135 190
191o "rtmbe": A non-zero value indicates that rcutorture believes that
192 rcu_assign_pointer() and rcu_dereference() are not working
193 correctly. This value should be zero.
194
195o "rtbke": rcutorture was unable to create the real-time kthreads
196 used to force RCU priority inversion. This value should be zero.
197
198o "rtbre": Although rcutorture successfully created the kthreads
199 used to force RCU priority inversion, it was unable to set them
200 to the real-time priority level of 1. This value should be zero.
201
202o "rtbf": The number of times that RCU priority boosting failed
203 to resolve RCU priority inversion.
204
205o "rtb": The number of times that rcutorture attempted to force
206 an RCU priority inversion condition. If you are testing RCU
207 priority boosting via the "test_boost" module parameter, this
208 value should be non-zero.
209
210o "nt": The number of times rcutorture ran RCU read-side code from
211 within a timer handler. This value should be non-zero only
212 if you specified the "irqreader" module parameter.
213
136o "Reader Pipe": Histogram of "ages" of structures seen by readers. 214o "Reader Pipe": Histogram of "ages" of structures seen by readers.
137 If any entries past the first two are non-zero, RCU is broken. 215 If any entries past the first two are non-zero, RCU is broken.
138 And rcutorture prints the error flag string "!!!" to make sure 216 And rcutorture prints the error flag string "!!!" to make sure
@@ -162,26 +240,15 @@ o "Free-Block Circulation": Shows the number of torture structures
162 somehow gets incremented farther than it should. 240 somehow gets incremented farther than it should.
163 241
164Different implementations of RCU can provide implementation-specific 242Different implementations of RCU can provide implementation-specific
165additional information. For example, SRCU provides the following: 243additional information. For example, SRCU provides the following
244additional line:
166 245
167 srcu-torture: rtc: f8cf46a8 ver: 355 tfle: 0 rta: 356 rtaf: 0 rtf: 346 rtmbe: 0
168 srcu-torture: Reader Pipe: 559738 939 0 0 0 0 0 0 0 0 0
169 srcu-torture: Reader Batch: 560434 243 0 0 0 0 0 0 0 0
170 srcu-torture: Free-Block Circulation: 355 354 353 352 351 350 349 348 347 346 0
171 srcu-torture: per-CPU(idx=1): 0(0,1) 1(0,1) 2(0,0) 3(0,1) 246 srcu-torture: per-CPU(idx=1): 0(0,1) 1(0,1) 2(0,0) 3(0,1)
172 247
173The first four lines are similar to those for RCU. The last line shows 248This line shows the per-CPU counter state. The numbers in parentheses are
174the per-CPU counter state. The numbers in parentheses are the values 249the values of the "old" and "current" counters for the corresponding CPU.
175of the "old" and "current" counters for the corresponding CPU. The 250The "idx" value maps the "old" and "current" values to the underlying
176"idx" value maps the "old" and "current" values to the underlying array, 251array, and is useful for debugging.
177and is useful for debugging.
178
179Similarly, sched_expedited RCU provides the following:
180
181 sched_expedited-torture: rtc: d0000000016c1880 ver: 1090796 tfle: 0 rta: 1090796 rtaf: 0 rtf: 1090787 rtmbe: 0 nt: 27713319
182 sched_expedited-torture: Reader Pipe: 12660320201 95875 0 0 0 0 0 0 0 0 0
183 sched_expedited-torture: Reader Batch: 12660424885 0 0 0 0 0 0 0 0 0 0
184 sched_expedited-torture: Free-Block Circulation: 1090795 1090795 1090794 1090793 1090792 1090791 1090790 1090789 1090788 1090787 0
185 252
186 253
187USAGE 254USAGE
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index 8173cec473aa..aaf65f6c6cd7 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -33,23 +33,23 @@ rcu/rcuboost:
33The output of "cat rcu/rcudata" looks as follows: 33The output of "cat rcu/rcudata" looks as follows:
34 34
35rcu_sched: 35rcu_sched:
36 0 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=545/1/0 df=50 of=0 ri=0 ql=163 qs=NRW. kt=0/W/0 ktl=ebc3 b=10 ci=153737 co=0 ca=0 36 0 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=545/1/0 df=50 of=0 ri=0 ql=163 qs=NRW. kt=0/W/0 ktl=ebc3 b=10 ci=153737 co=0 ca=0
37 1 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=967/1/0 df=58 of=0 ri=0 ql=634 qs=NRW. kt=0/W/1 ktl=58c b=10 ci=191037 co=0 ca=0 37 1 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=967/1/0 df=58 of=0 ri=0 ql=634 qs=NRW. kt=0/W/1 ktl=58c b=10 ci=191037 co=0 ca=0
38 2 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=1081/1/0 df=175 of=0 ri=0 ql=74 qs=N.W. kt=0/W/2 ktl=da94 b=10 ci=75991 co=0 ca=0 38 2 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=1081/1/0 df=175 of=0 ri=0 ql=74 qs=N.W. kt=0/W/2 ktl=da94 b=10 ci=75991 co=0 ca=0
39 3 c=20942 g=20943 pq=1 pqc=20942 qp=1 dt=1846/0/0 df=404 of=0 ri=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=72261 co=0 ca=0 39 3 c=20942 g=20943 pq=1 pgp=20942 qp=1 dt=1846/0/0 df=404 of=0 ri=0 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=72261 co=0 ca=0
40 4 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=369/1/0 df=83 of=0 ri=0 ql=48 qs=N.W. kt=0/W/4 ktl=e0e7 b=10 ci=128365 co=0 ca=0 40 4 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=369/1/0 df=83 of=0 ri=0 ql=48 qs=N.W. kt=0/W/4 ktl=e0e7 b=10 ci=128365 co=0 ca=0
41 5 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=381/1/0 df=64 of=0 ri=0 ql=169 qs=NRW. kt=0/W/5 ktl=fb2f b=10 ci=164360 co=0 ca=0 41 5 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=381/1/0 df=64 of=0 ri=0 ql=169 qs=NRW. kt=0/W/5 ktl=fb2f b=10 ci=164360 co=0 ca=0
42 6 c=20972 g=20973 pq=1 pqc=20972 qp=0 dt=1037/1/0 df=183 of=0 ri=0 ql=62 qs=N.W. kt=0/W/6 ktl=d2ad b=10 ci=65663 co=0 ca=0 42 6 c=20972 g=20973 pq=1 pgp=20973 qp=0 dt=1037/1/0 df=183 of=0 ri=0 ql=62 qs=N.W. kt=0/W/6 ktl=d2ad b=10 ci=65663 co=0 ca=0
43 7 c=20897 g=20897 pq=1 pqc=20896 qp=0 dt=1572/0/0 df=382 of=0 ri=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=75006 co=0 ca=0 43 7 c=20897 g=20897 pq=1 pgp=20896 qp=0 dt=1572/0/0 df=382 of=0 ri=0 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=75006 co=0 ca=0
44rcu_bh: 44rcu_bh:
45 0 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=545/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/0 ktl=ebc3 b=10 ci=0 co=0 ca=0 45 0 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=545/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/0 ktl=ebc3 b=10 ci=0 co=0 ca=0
46 1 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=967/1/0 df=3 of=0 ri=1 ql=0 qs=.... kt=0/W/1 ktl=58c b=10 ci=151 co=0 ca=0 46 1 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=967/1/0 df=3 of=0 ri=1 ql=0 qs=.... kt=0/W/1 ktl=58c b=10 ci=151 co=0 ca=0
47 2 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1081/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/2 ktl=da94 b=10 ci=0 co=0 ca=0 47 2 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1081/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/2 ktl=da94 b=10 ci=0 co=0 ca=0
48 3 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1846/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=0 co=0 ca=0 48 3 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1846/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/3 ktl=d1cd b=10 ci=0 co=0 ca=0
49 4 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=369/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/4 ktl=e0e7 b=10 ci=0 co=0 ca=0 49 4 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=369/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/4 ktl=e0e7 b=10 ci=0 co=0 ca=0
50 5 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=381/1/0 df=4 of=0 ri=1 ql=0 qs=.... kt=0/W/5 ktl=fb2f b=10 ci=0 co=0 ca=0 50 5 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=381/1/0 df=4 of=0 ri=1 ql=0 qs=.... kt=0/W/5 ktl=fb2f b=10 ci=0 co=0 ca=0
51 6 c=1480 g=1480 pq=1 pqc=1479 qp=0 dt=1037/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/6 ktl=d2ad b=10 ci=0 co=0 ca=0 51 6 c=1480 g=1480 pq=1 pgp=1480 qp=0 dt=1037/1/0 df=6 of=0 ri=1 ql=0 qs=.... kt=0/W/6 ktl=d2ad b=10 ci=0 co=0 ca=0
52 7 c=1474 g=1474 pq=1 pqc=1473 qp=0 dt=1572/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=0 co=0 ca=0 52 7 c=1474 g=1474 pq=1 pgp=1473 qp=0 dt=1572/0/0 df=8 of=0 ri=1 ql=0 qs=.... kt=0/W/7 ktl=cf15 b=10 ci=0 co=0 ca=0
53 53
54The first section lists the rcu_data structures for rcu_sched, the second 54The first section lists the rcu_data structures for rcu_sched, the second
55for rcu_bh. Note that CONFIG_TREE_PREEMPT_RCU kernels will have an 55for rcu_bh. Note that CONFIG_TREE_PREEMPT_RCU kernels will have an
@@ -84,7 +84,7 @@ o "pq" indicates that this CPU has passed through a quiescent state
84 CPU has not yet reported that fact, (2) some other CPU has not 84 CPU has not yet reported that fact, (2) some other CPU has not
85 yet reported for this grace period, or (3) both. 85 yet reported for this grace period, or (3) both.
86 86
87o "pqc" indicates which grace period the last-observed quiescent 87o "pgp" indicates which grace period the last-observed quiescent
88 state for this CPU corresponds to. This is important for handling 88 state for this CPU corresponds to. This is important for handling
89 the race between CPU 0 reporting an extended dynticks-idle 89 the race between CPU 0 reporting an extended dynticks-idle
90 quiescent state for CPU 1 and CPU 1 suddenly waking up and 90 quiescent state for CPU 1 and CPU 1 suddenly waking up and
@@ -184,10 +184,14 @@ o "kt" is the per-CPU kernel-thread state. The digit preceding
184 The number after the final slash is the CPU that the kthread 184 The number after the final slash is the CPU that the kthread
185 is actually running on. 185 is actually running on.
186 186
187 This field is displayed only for CONFIG_RCU_BOOST kernels.
188
187o "ktl" is the low-order 16 bits (in hexadecimal) of the count of 189o "ktl" is the low-order 16 bits (in hexadecimal) of the count of
188 the number of times that this CPU's per-CPU kthread has gone 190 the number of times that this CPU's per-CPU kthread has gone
189 through its loop servicing invoke_rcu_cpu_kthread() requests. 191 through its loop servicing invoke_rcu_cpu_kthread() requests.
190 192
193 This field is displayed only for CONFIG_RCU_BOOST kernels.
194
191o "b" is the batch limit for this CPU. If more than this number 195o "b" is the batch limit for this CPU. If more than this number
192 of RCU callbacks is ready to invoke, then the remainder will 196 of RCU callbacks is ready to invoke, then the remainder will
193 be deferred. 197 be deferred.
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index ef820a3c378b..b6a56e37284c 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -548,7 +548,7 @@ do { \
548#endif 548#endif
549 549
550#ifdef CONFIG_PROVE_RCU 550#ifdef CONFIG_PROVE_RCU
551extern void lockdep_rcu_dereference(const char *file, const int line); 551void lockdep_rcu_suspicious(const char *file, const int line, const char *s);
552#endif 552#endif
553 553
554#endif /* __LINUX_LOCKDEP_H */ 554#endif /* __LINUX_LOCKDEP_H */
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 8f4f881a0ad8..2cf4226ade7e 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -33,6 +33,7 @@
33#ifndef __LINUX_RCUPDATE_H 33#ifndef __LINUX_RCUPDATE_H
34#define __LINUX_RCUPDATE_H 34#define __LINUX_RCUPDATE_H
35 35
36#include <linux/types.h>
36#include <linux/cache.h> 37#include <linux/cache.h>
37#include <linux/spinlock.h> 38#include <linux/spinlock.h>
38#include <linux/threads.h> 39#include <linux/threads.h>
@@ -64,32 +65,74 @@ static inline void rcutorture_record_progress(unsigned long vernum)
64#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) 65#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b))
65#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) 66#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b))
66 67
68/* Exported common interfaces */
69
70#ifdef CONFIG_PREEMPT_RCU
71
67/** 72/**
68 * struct rcu_head - callback structure for use with RCU 73 * call_rcu() - Queue an RCU callback for invocation after a grace period.
69 * @next: next update requests in a list 74 * @head: structure to be used for queueing the RCU updates.
70 * @func: actual update function to call after the grace period. 75 * @func: actual callback function to be invoked after the grace period
76 *
77 * The callback function will be invoked some time after a full grace
78 * period elapses, in other words after all pre-existing RCU read-side
79 * critical sections have completed. However, the callback function
80 * might well execute concurrently with RCU read-side critical sections
81 * that started after call_rcu() was invoked. RCU read-side critical
82 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
83 * and may be nested.
71 */ 84 */
72struct rcu_head { 85extern void call_rcu(struct rcu_head *head,
73 struct rcu_head *next; 86 void (*func)(struct rcu_head *head));
74 void (*func)(struct rcu_head *head);
75};
76 87
77/* Exported common interfaces */ 88#else /* #ifdef CONFIG_PREEMPT_RCU */
89
90/* In classic RCU, call_rcu() is just call_rcu_sched(). */
91#define call_rcu call_rcu_sched
92
93#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
94
95/**
96 * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
97 * @head: structure to be used for queueing the RCU updates.
98 * @func: actual callback function to be invoked after the grace period
99 *
100 * The callback function will be invoked some time after a full grace
101 * period elapses, in other words after all currently executing RCU
102 * read-side critical sections have completed. call_rcu_bh() assumes
103 * that the read-side critical sections end on completion of a softirq
104 * handler. This means that read-side critical sections in process
105 * context must not be interrupted by softirqs. This interface is to be
106 * used when most of the read-side critical sections are in softirq context.
107 * RCU read-side critical sections are delimited by :
108 * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context.
109 * OR
110 * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
111 * These may be nested.
112 */
113extern void call_rcu_bh(struct rcu_head *head,
114 void (*func)(struct rcu_head *head));
115
116/**
117 * call_rcu_sched() - Queue an RCU for invocation after sched grace period.
118 * @head: structure to be used for queueing the RCU updates.
119 * @func: actual callback function to be invoked after the grace period
120 *
121 * The callback function will be invoked some time after a full grace
122 * period elapses, in other words after all currently executing RCU
123 * read-side critical sections have completed. call_rcu_sched() assumes
124 * that the read-side critical sections end on enabling of preemption
125 * or on voluntary preemption.
126 * RCU read-side critical sections are delimited by :
127 * - rcu_read_lock_sched() and rcu_read_unlock_sched(),
128 * OR
129 * anything that disables preemption.
130 * These may be nested.
131 */
78extern void call_rcu_sched(struct rcu_head *head, 132extern void call_rcu_sched(struct rcu_head *head,
79 void (*func)(struct rcu_head *rcu)); 133 void (*func)(struct rcu_head *rcu));
80extern void synchronize_sched(void);
81extern void rcu_barrier_bh(void);
82extern void rcu_barrier_sched(void);
83
84static inline void __rcu_read_lock_bh(void)
85{
86 local_bh_disable();
87}
88 134
89static inline void __rcu_read_unlock_bh(void) 135extern void synchronize_sched(void);
90{
91 local_bh_enable();
92}
93 136
94#ifdef CONFIG_PREEMPT_RCU 137#ifdef CONFIG_PREEMPT_RCU
95 138
@@ -152,6 +195,15 @@ static inline void rcu_exit_nohz(void)
152 195
153#endif /* #else #ifdef CONFIG_NO_HZ */ 196#endif /* #else #ifdef CONFIG_NO_HZ */
154 197
198/*
199 * Infrastructure to implement the synchronize_() primitives in
200 * TREE_RCU and rcu_barrier_() primitives in TINY_RCU.
201 */
202
203typedef void call_rcu_func_t(struct rcu_head *head,
204 void (*func)(struct rcu_head *head));
205void wait_rcu_gp(call_rcu_func_t crf);
206
155#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) 207#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
156#include <linux/rcutree.h> 208#include <linux/rcutree.h>
157#elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU) 209#elif defined(CONFIG_TINY_RCU) || defined(CONFIG_TINY_PREEMPT_RCU)
@@ -297,19 +349,31 @@ extern int rcu_my_thread_group_empty(void);
297/** 349/**
298 * rcu_lockdep_assert - emit lockdep splat if specified condition not met 350 * rcu_lockdep_assert - emit lockdep splat if specified condition not met
299 * @c: condition to check 351 * @c: condition to check
352 * @s: informative message
300 */ 353 */
301#define rcu_lockdep_assert(c) \ 354#define rcu_lockdep_assert(c, s) \
302 do { \ 355 do { \
303 static bool __warned; \ 356 static bool __warned; \
304 if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ 357 if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \
305 __warned = true; \ 358 __warned = true; \
306 lockdep_rcu_dereference(__FILE__, __LINE__); \ 359 lockdep_rcu_suspicious(__FILE__, __LINE__, s); \
307 } \ 360 } \
308 } while (0) 361 } while (0)
309 362
363#define rcu_sleep_check() \
364 do { \
365 rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map), \
366 "Illegal context switch in RCU-bh" \
367 " read-side critical section"); \
368 rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map), \
369 "Illegal context switch in RCU-sched"\
370 " read-side critical section"); \
371 } while (0)
372
310#else /* #ifdef CONFIG_PROVE_RCU */ 373#else /* #ifdef CONFIG_PROVE_RCU */
311 374
312#define rcu_lockdep_assert(c) do { } while (0) 375#define rcu_lockdep_assert(c, s) do { } while (0)
376#define rcu_sleep_check() do { } while (0)
313 377
314#endif /* #else #ifdef CONFIG_PROVE_RCU */ 378#endif /* #else #ifdef CONFIG_PROVE_RCU */
315 379
@@ -338,14 +402,16 @@ extern int rcu_my_thread_group_empty(void);
338#define __rcu_dereference_check(p, c, space) \ 402#define __rcu_dereference_check(p, c, space) \
339 ({ \ 403 ({ \
340 typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \ 404 typeof(*p) *_________p1 = (typeof(*p)*__force )ACCESS_ONCE(p); \
341 rcu_lockdep_assert(c); \ 405 rcu_lockdep_assert(c, "suspicious rcu_dereference_check()" \
406 " usage"); \
342 rcu_dereference_sparse(p, space); \ 407 rcu_dereference_sparse(p, space); \
343 smp_read_barrier_depends(); \ 408 smp_read_barrier_depends(); \
344 ((typeof(*p) __force __kernel *)(_________p1)); \ 409 ((typeof(*p) __force __kernel *)(_________p1)); \
345 }) 410 })
346#define __rcu_dereference_protected(p, c, space) \ 411#define __rcu_dereference_protected(p, c, space) \
347 ({ \ 412 ({ \
348 rcu_lockdep_assert(c); \ 413 rcu_lockdep_assert(c, "suspicious rcu_dereference_protected()" \
414 " usage"); \
349 rcu_dereference_sparse(p, space); \ 415 rcu_dereference_sparse(p, space); \
350 ((typeof(*p) __force __kernel *)(p)); \ 416 ((typeof(*p) __force __kernel *)(p)); \
351 }) 417 })
@@ -359,15 +425,15 @@ extern int rcu_my_thread_group_empty(void);
359#define __rcu_dereference_index_check(p, c) \ 425#define __rcu_dereference_index_check(p, c) \
360 ({ \ 426 ({ \
361 typeof(p) _________p1 = ACCESS_ONCE(p); \ 427 typeof(p) _________p1 = ACCESS_ONCE(p); \
362 rcu_lockdep_assert(c); \ 428 rcu_lockdep_assert(c, \
429 "suspicious rcu_dereference_index_check()" \
430 " usage"); \
363 smp_read_barrier_depends(); \ 431 smp_read_barrier_depends(); \
364 (_________p1); \ 432 (_________p1); \
365 }) 433 })
366#define __rcu_assign_pointer(p, v, space) \ 434#define __rcu_assign_pointer(p, v, space) \
367 ({ \ 435 ({ \
368 if (!__builtin_constant_p(v) || \ 436 smp_wmb(); \
369 ((v) != NULL)) \
370 smp_wmb(); \
371 (p) = (typeof(*v) __force space *)(v); \ 437 (p) = (typeof(*v) __force space *)(v); \
372 }) 438 })
373 439
@@ -500,26 +566,6 @@ extern int rcu_my_thread_group_empty(void);
500#define rcu_dereference_protected(p, c) \ 566#define rcu_dereference_protected(p, c) \
501 __rcu_dereference_protected((p), (c), __rcu) 567 __rcu_dereference_protected((p), (c), __rcu)
502 568
503/**
504 * rcu_dereference_bh_protected() - fetch RCU-bh pointer when updates prevented
505 * @p: The pointer to read, prior to dereferencing
506 * @c: The conditions under which the dereference will take place
507 *
508 * This is the RCU-bh counterpart to rcu_dereference_protected().
509 */
510#define rcu_dereference_bh_protected(p, c) \
511 __rcu_dereference_protected((p), (c), __rcu)
512
513/**
514 * rcu_dereference_sched_protected() - fetch RCU-sched pointer when updates prevented
515 * @p: The pointer to read, prior to dereferencing
516 * @c: The conditions under which the dereference will take place
517 *
518 * This is the RCU-sched counterpart to rcu_dereference_protected().
519 */
520#define rcu_dereference_sched_protected(p, c) \
521 __rcu_dereference_protected((p), (c), __rcu)
522
523 569
524/** 570/**
525 * rcu_dereference() - fetch RCU-protected pointer for dereferencing 571 * rcu_dereference() - fetch RCU-protected pointer for dereferencing
@@ -630,7 +676,7 @@ static inline void rcu_read_unlock(void)
630 */ 676 */
631static inline void rcu_read_lock_bh(void) 677static inline void rcu_read_lock_bh(void)
632{ 678{
633 __rcu_read_lock_bh(); 679 local_bh_disable();
634 __acquire(RCU_BH); 680 __acquire(RCU_BH);
635 rcu_read_acquire_bh(); 681 rcu_read_acquire_bh();
636} 682}
@@ -644,7 +690,7 @@ static inline void rcu_read_unlock_bh(void)
644{ 690{
645 rcu_read_release_bh(); 691 rcu_read_release_bh();
646 __release(RCU_BH); 692 __release(RCU_BH);
647 __rcu_read_unlock_bh(); 693 local_bh_enable();
648} 694}
649 695
650/** 696/**
@@ -698,11 +744,18 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
698 * any prior initialization. Returns the value assigned. 744 * any prior initialization. Returns the value assigned.
699 * 745 *
700 * Inserts memory barriers on architectures that require them 746 * Inserts memory barriers on architectures that require them
701 * (pretty much all of them other than x86), and also prevents 747 * (which is most of them), and also prevents the compiler from
702 * the compiler from reordering the code that initializes the 748 * reordering the code that initializes the structure after the pointer
703 * structure after the pointer assignment. More importantly, this 749 * assignment. More importantly, this call documents which pointers
704 * call documents which pointers will be dereferenced by RCU read-side 750 * will be dereferenced by RCU read-side code.
705 * code. 751 *
752 * In some special cases, you may use RCU_INIT_POINTER() instead
753 * of rcu_assign_pointer(). RCU_INIT_POINTER() is a bit faster due
754 * to the fact that it does not constrain either the CPU or the compiler.
755 * That said, using RCU_INIT_POINTER() when you should have used
756 * rcu_assign_pointer() is a very bad thing that results in
757 * impossible-to-diagnose memory corruption. So please be careful.
758 * See the RCU_INIT_POINTER() comment header for details.
706 */ 759 */
707#define rcu_assign_pointer(p, v) \ 760#define rcu_assign_pointer(p, v) \
708 __rcu_assign_pointer((p), (v), __rcu) 761 __rcu_assign_pointer((p), (v), __rcu)
@@ -710,105 +763,38 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
710/** 763/**
711 * RCU_INIT_POINTER() - initialize an RCU protected pointer 764 * RCU_INIT_POINTER() - initialize an RCU protected pointer
712 * 765 *
713 * Initialize an RCU-protected pointer in such a way to avoid RCU-lockdep 766 * Initialize an RCU-protected pointer in special cases where readers
714 * splats. 767 * do not need ordering constraints on the CPU or the compiler. These
768 * special cases are:
769 *
770 * 1. This use of RCU_INIT_POINTER() is NULLing out the pointer -or-
771 * 2. The caller has taken whatever steps are required to prevent
772 * RCU readers from concurrently accessing this pointer -or-
773 * 3. The referenced data structure has already been exposed to
774 * readers either at compile time or via rcu_assign_pointer() -and-
775 * a. You have not made -any- reader-visible changes to
776 * this structure since then -or-
777 * b. It is OK for readers accessing this structure from its
778 * new location to see the old state of the structure. (For
779 * example, the changes were to statistical counters or to
780 * other state where exact synchronization is not required.)
781 *
782 * Failure to follow these rules governing use of RCU_INIT_POINTER() will
783 * result in impossible-to-diagnose memory corruption. As in the structures
784 * will look OK in crash dumps, but any concurrent RCU readers might
785 * see pre-initialized values of the referenced data structure. So
786 * please be very careful how you use RCU_INIT_POINTER()!!!
787 *
788 * If you are creating an RCU-protected linked structure that is accessed
789 * by a single external-to-structure RCU-protected pointer, then you may
790 * use RCU_INIT_POINTER() to initialize the internal RCU-protected
791 * pointers, but you must use rcu_assign_pointer() to initialize the
792 * external-to-structure pointer -after- you have completely initialized
793 * the reader-accessible portions of the linked structure.
715 */ 794 */
716#define RCU_INIT_POINTER(p, v) \ 795#define RCU_INIT_POINTER(p, v) \
717 p = (typeof(*v) __force __rcu *)(v) 796 p = (typeof(*v) __force __rcu *)(v)
718 797
719/* Infrastructure to implement the synchronize_() primitives. */
720
721struct rcu_synchronize {
722 struct rcu_head head;
723 struct completion completion;
724};
725
726extern void wakeme_after_rcu(struct rcu_head *head);
727
728#ifdef CONFIG_PREEMPT_RCU
729
730/**
731 * call_rcu() - Queue an RCU callback for invocation after a grace period.
732 * @head: structure to be used for queueing the RCU updates.
733 * @func: actual callback function to be invoked after the grace period
734 *
735 * The callback function will be invoked some time after a full grace
736 * period elapses, in other words after all pre-existing RCU read-side
737 * critical sections have completed. However, the callback function
738 * might well execute concurrently with RCU read-side critical sections
739 * that started after call_rcu() was invoked. RCU read-side critical
740 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
741 * and may be nested.
742 */
743extern void call_rcu(struct rcu_head *head,
744 void (*func)(struct rcu_head *head));
745
746#else /* #ifdef CONFIG_PREEMPT_RCU */
747
748/* In classic RCU, call_rcu() is just call_rcu_sched(). */
749#define call_rcu call_rcu_sched
750
751#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
752
753/**
754 * call_rcu_bh() - Queue an RCU for invocation after a quicker grace period.
755 * @head: structure to be used for queueing the RCU updates.
756 * @func: actual callback function to be invoked after the grace period
757 *
758 * The callback function will be invoked some time after a full grace
759 * period elapses, in other words after all currently executing RCU
760 * read-side critical sections have completed. call_rcu_bh() assumes
761 * that the read-side critical sections end on completion of a softirq
762 * handler. This means that read-side critical sections in process
763 * context must not be interrupted by softirqs. This interface is to be
764 * used when most of the read-side critical sections are in softirq context.
765 * RCU read-side critical sections are delimited by :
766 * - rcu_read_lock() and rcu_read_unlock(), if in interrupt context.
767 * OR
768 * - rcu_read_lock_bh() and rcu_read_unlock_bh(), if in process context.
769 * These may be nested.
770 */
771extern void call_rcu_bh(struct rcu_head *head,
772 void (*func)(struct rcu_head *head));
773
774/*
775 * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally
776 * by call_rcu() and rcu callback execution, and are therefore not part of the
777 * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors.
778 */
779
780#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
781# define STATE_RCU_HEAD_READY 0
782# define STATE_RCU_HEAD_QUEUED 1
783
784extern struct debug_obj_descr rcuhead_debug_descr;
785
786static inline void debug_rcu_head_queue(struct rcu_head *head)
787{
788 WARN_ON_ONCE((unsigned long)head & 0x3);
789 debug_object_activate(head, &rcuhead_debug_descr);
790 debug_object_active_state(head, &rcuhead_debug_descr,
791 STATE_RCU_HEAD_READY,
792 STATE_RCU_HEAD_QUEUED);
793}
794
795static inline void debug_rcu_head_unqueue(struct rcu_head *head)
796{
797 debug_object_active_state(head, &rcuhead_debug_descr,
798 STATE_RCU_HEAD_QUEUED,
799 STATE_RCU_HEAD_READY);
800 debug_object_deactivate(head, &rcuhead_debug_descr);
801}
802#else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
803static inline void debug_rcu_head_queue(struct rcu_head *head)
804{
805}
806
807static inline void debug_rcu_head_unqueue(struct rcu_head *head)
808{
809}
810#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
811
812static __always_inline bool __is_kfree_rcu_offset(unsigned long offset) 798static __always_inline bool __is_kfree_rcu_offset(unsigned long offset)
813{ 799{
814 return offset < 4096; 800 return offset < 4096;
@@ -827,18 +813,6 @@ void __kfree_rcu(struct rcu_head *head, unsigned long offset)
827 call_rcu(head, (rcu_callback)offset); 813 call_rcu(head, (rcu_callback)offset);
828} 814}
829 815
830extern void kfree(const void *);
831
832static inline void __rcu_reclaim(struct rcu_head *head)
833{
834 unsigned long offset = (unsigned long)head->func;
835
836 if (__is_kfree_rcu_offset(offset))
837 kfree((void *)head - offset);
838 else
839 head->func(head);
840}
841
842/** 816/**
843 * kfree_rcu() - kfree an object after a grace period. 817 * kfree_rcu() - kfree an object after a grace period.
844 * @ptr: pointer to kfree 818 * @ptr: pointer to kfree
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 52b3e0281fd0..00b7a5e493d2 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -27,9 +27,23 @@
27 27
28#include <linux/cache.h> 28#include <linux/cache.h>
29 29
30#ifdef CONFIG_RCU_BOOST
30static inline void rcu_init(void) 31static inline void rcu_init(void)
31{ 32{
32} 33}
34#else /* #ifdef CONFIG_RCU_BOOST */
35void rcu_init(void);
36#endif /* #else #ifdef CONFIG_RCU_BOOST */
37
38static inline void rcu_barrier_bh(void)
39{
40 wait_rcu_gp(call_rcu_bh);
41}
42
43static inline void rcu_barrier_sched(void)
44{
45 wait_rcu_gp(call_rcu_sched);
46}
33 47
34#ifdef CONFIG_TINY_RCU 48#ifdef CONFIG_TINY_RCU
35 49
@@ -45,9 +59,13 @@ static inline void rcu_barrier(void)
45 59
46#else /* #ifdef CONFIG_TINY_RCU */ 60#else /* #ifdef CONFIG_TINY_RCU */
47 61
48void rcu_barrier(void);
49void synchronize_rcu_expedited(void); 62void synchronize_rcu_expedited(void);
50 63
64static inline void rcu_barrier(void)
65{
66 wait_rcu_gp(call_rcu);
67}
68
51#endif /* #else #ifdef CONFIG_TINY_RCU */ 69#endif /* #else #ifdef CONFIG_TINY_RCU */
52 70
53static inline void synchronize_rcu_bh(void) 71static inline void synchronize_rcu_bh(void)
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index e65d06634dd8..67458468f1a8 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -67,6 +67,8 @@ static inline void synchronize_rcu_bh_expedited(void)
67} 67}
68 68
69extern void rcu_barrier(void); 69extern void rcu_barrier(void);
70extern void rcu_barrier_bh(void);
71extern void rcu_barrier_sched(void);
70 72
71extern unsigned long rcutorture_testseq; 73extern unsigned long rcutorture_testseq;
72extern unsigned long rcutorture_vernum; 74extern unsigned long rcutorture_vernum;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1be699dd32a5..ede8a6585e38 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -270,7 +270,6 @@ extern void init_idle_bootup_task(struct task_struct *idle);
270 270
271extern int runqueue_is_locked(int cpu); 271extern int runqueue_is_locked(int cpu);
272 272
273extern cpumask_var_t nohz_cpu_mask;
274#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ) 273#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
275extern void select_nohz_load_balancer(int stop_tick); 274extern void select_nohz_load_balancer(int stop_tick);
276extern int get_nohz_timer_target(void); 275extern int get_nohz_timer_target(void);
@@ -1260,9 +1259,6 @@ struct task_struct {
1260#ifdef CONFIG_PREEMPT_RCU 1259#ifdef CONFIG_PREEMPT_RCU
1261 int rcu_read_lock_nesting; 1260 int rcu_read_lock_nesting;
1262 char rcu_read_unlock_special; 1261 char rcu_read_unlock_special;
1263#if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU)
1264 int rcu_boosted;
1265#endif /* #if defined(CONFIG_RCU_BOOST) && defined(CONFIG_TREE_PREEMPT_RCU) */
1266 struct list_head rcu_node_entry; 1262 struct list_head rcu_node_entry;
1267#endif /* #ifdef CONFIG_PREEMPT_RCU */ 1263#endif /* #ifdef CONFIG_PREEMPT_RCU */
1268#ifdef CONFIG_TREE_PREEMPT_RCU 1264#ifdef CONFIG_TREE_PREEMPT_RCU
diff --git a/include/linux/types.h b/include/linux/types.h
index 176da8c1fbb1..57a97234bec1 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -238,6 +238,16 @@ struct ustat {
238 char f_fpack[6]; 238 char f_fpack[6];
239}; 239};
240 240
241/**
242 * struct rcu_head - callback structure for use with RCU
243 * @next: next update requests in a list
244 * @func: actual update function to call after the grace period.
245 */
246struct rcu_head {
247 struct rcu_head *next;
248 void (*func)(struct rcu_head *head);
249};
250
241#endif /* __KERNEL__ */ 251#endif /* __KERNEL__ */
242#endif /* __ASSEMBLY__ */ 252#endif /* __ASSEMBLY__ */
243#endif /* _LINUX_TYPES_H */ 253#endif /* _LINUX_TYPES_H */
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
new file mode 100644
index 000000000000..669fbd62ec25
--- /dev/null
+++ b/include/trace/events/rcu.h
@@ -0,0 +1,459 @@
1#undef TRACE_SYSTEM
2#define TRACE_SYSTEM rcu
3
4#if !defined(_TRACE_RCU_H) || defined(TRACE_HEADER_MULTI_READ)
5#define _TRACE_RCU_H
6
7#include <linux/tracepoint.h>
8
9/*
10 * Tracepoint for start/end markers used for utilization calculations.
11 * By convention, the string is of the following forms:
12 *
13 * "Start <activity>" -- Mark the start of the specified activity,
14 * such as "context switch". Nesting is permitted.
15 * "End <activity>" -- Mark the end of the specified activity.
16 *
17 * An "@" character within "<activity>" is a comment character: Data
18 * reduction scripts will ignore the "@" and the remainder of the line.
19 */
20TRACE_EVENT(rcu_utilization,
21
22 TP_PROTO(char *s),
23
24 TP_ARGS(s),
25
26 TP_STRUCT__entry(
27 __field(char *, s)
28 ),
29
30 TP_fast_assign(
31 __entry->s = s;
32 ),
33
34 TP_printk("%s", __entry->s)
35);
36
37#ifdef CONFIG_RCU_TRACE
38
39#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
40
41/*
42 * Tracepoint for grace-period events: starting and ending a grace
43 * period ("start" and "end", respectively), a CPU noting the start
44 * of a new grace period or the end of an old grace period ("cpustart"
45 * and "cpuend", respectively), a CPU passing through a quiescent
46 * state ("cpuqs"), a CPU coming online or going offline ("cpuonl"
47 * and "cpuofl", respectively), and a CPU being kicked for being too
48 * long in dyntick-idle mode ("kick").
49 */
50TRACE_EVENT(rcu_grace_period,
51
52 TP_PROTO(char *rcuname, unsigned long gpnum, char *gpevent),
53
54 TP_ARGS(rcuname, gpnum, gpevent),
55
56 TP_STRUCT__entry(
57 __field(char *, rcuname)
58 __field(unsigned long, gpnum)
59 __field(char *, gpevent)
60 ),
61
62 TP_fast_assign(
63 __entry->rcuname = rcuname;
64 __entry->gpnum = gpnum;
65 __entry->gpevent = gpevent;
66 ),
67
68 TP_printk("%s %lu %s",
69 __entry->rcuname, __entry->gpnum, __entry->gpevent)
70);
71
72/*
73 * Tracepoint for grace-period-initialization events. These are
74 * distinguished by the type of RCU, the new grace-period number, the
75 * rcu_node structure level, the starting and ending CPU covered by the
76 * rcu_node structure, and the mask of CPUs that will be waited for.
77 * All but the type of RCU are extracted from the rcu_node structure.
78 */
79TRACE_EVENT(rcu_grace_period_init,
80
81 TP_PROTO(char *rcuname, unsigned long gpnum, u8 level,
82 int grplo, int grphi, unsigned long qsmask),
83
84 TP_ARGS(rcuname, gpnum, level, grplo, grphi, qsmask),
85
86 TP_STRUCT__entry(
87 __field(char *, rcuname)
88 __field(unsigned long, gpnum)
89 __field(u8, level)
90 __field(int, grplo)
91 __field(int, grphi)
92 __field(unsigned long, qsmask)
93 ),
94
95 TP_fast_assign(
96 __entry->rcuname = rcuname;
97 __entry->gpnum = gpnum;
98 __entry->level = level;
99 __entry->grplo = grplo;
100 __entry->grphi = grphi;
101 __entry->qsmask = qsmask;
102 ),
103
104 TP_printk("%s %lu %u %d %d %lx",
105 __entry->rcuname, __entry->gpnum, __entry->level,
106 __entry->grplo, __entry->grphi, __entry->qsmask)
107);
108
109/*
110 * Tracepoint for tasks blocking within preemptible-RCU read-side
111 * critical sections. Track the type of RCU (which one day might
112 * include SRCU), the grace-period number that the task is blocking
113 * (the current or the next), and the task's PID.
114 */
115TRACE_EVENT(rcu_preempt_task,
116
117 TP_PROTO(char *rcuname, int pid, unsigned long gpnum),
118
119 TP_ARGS(rcuname, pid, gpnum),
120
121 TP_STRUCT__entry(
122 __field(char *, rcuname)
123 __field(unsigned long, gpnum)
124 __field(int, pid)
125 ),
126
127 TP_fast_assign(
128 __entry->rcuname = rcuname;
129 __entry->gpnum = gpnum;
130 __entry->pid = pid;
131 ),
132
133 TP_printk("%s %lu %d",
134 __entry->rcuname, __entry->gpnum, __entry->pid)
135);
136
137/*
138 * Tracepoint for tasks that blocked within a given preemptible-RCU
139 * read-side critical section exiting that critical section. Track the
140 * type of RCU (which one day might include SRCU) and the task's PID.
141 */
142TRACE_EVENT(rcu_unlock_preempted_task,
143
144 TP_PROTO(char *rcuname, unsigned long gpnum, int pid),
145
146 TP_ARGS(rcuname, gpnum, pid),
147
148 TP_STRUCT__entry(
149 __field(char *, rcuname)
150 __field(unsigned long, gpnum)
151 __field(int, pid)
152 ),
153
154 TP_fast_assign(
155 __entry->rcuname = rcuname;
156 __entry->gpnum = gpnum;
157 __entry->pid = pid;
158 ),
159
160 TP_printk("%s %lu %d", __entry->rcuname, __entry->gpnum, __entry->pid)
161);
162
163/*
164 * Tracepoint for quiescent-state-reporting events. These are
165 * distinguished by the type of RCU, the grace-period number, the
166 * mask of quiescent lower-level entities, the rcu_node structure level,
167 * the starting and ending CPU covered by the rcu_node structure, and
168 * whether there are any blocked tasks blocking the current grace period.
169 * All but the type of RCU are extracted from the rcu_node structure.
170 */
171TRACE_EVENT(rcu_quiescent_state_report,
172
173 TP_PROTO(char *rcuname, unsigned long gpnum,
174 unsigned long mask, unsigned long qsmask,
175 u8 level, int grplo, int grphi, int gp_tasks),
176
177 TP_ARGS(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks),
178
179 TP_STRUCT__entry(
180 __field(char *, rcuname)
181 __field(unsigned long, gpnum)
182 __field(unsigned long, mask)
183 __field(unsigned long, qsmask)
184 __field(u8, level)
185 __field(int, grplo)
186 __field(int, grphi)
187 __field(u8, gp_tasks)
188 ),
189
190 TP_fast_assign(
191 __entry->rcuname = rcuname;
192 __entry->gpnum = gpnum;
193 __entry->mask = mask;
194 __entry->qsmask = qsmask;
195 __entry->level = level;
196 __entry->grplo = grplo;
197 __entry->grphi = grphi;
198 __entry->gp_tasks = gp_tasks;
199 ),
200
201 TP_printk("%s %lu %lx>%lx %u %d %d %u",
202 __entry->rcuname, __entry->gpnum,
203 __entry->mask, __entry->qsmask, __entry->level,
204 __entry->grplo, __entry->grphi, __entry->gp_tasks)
205);
206
207/*
208 * Tracepoint for quiescent states detected by force_quiescent_state().
209 * These trace events include the type of RCU, the grace-period number
210 * that was blocked by the CPU, the CPU itself, and the type of quiescent
211 * state, which can be "dti" for dyntick-idle mode, "ofl" for CPU offline,
212 * or "kick" when kicking a CPU that has been in dyntick-idle mode for
213 * too long.
214 */
215TRACE_EVENT(rcu_fqs,
216
217 TP_PROTO(char *rcuname, unsigned long gpnum, int cpu, char *qsevent),
218
219 TP_ARGS(rcuname, gpnum, cpu, qsevent),
220
221 TP_STRUCT__entry(
222 __field(char *, rcuname)
223 __field(unsigned long, gpnum)
224 __field(int, cpu)
225 __field(char *, qsevent)
226 ),
227
228 TP_fast_assign(
229 __entry->rcuname = rcuname;
230 __entry->gpnum = gpnum;
231 __entry->cpu = cpu;
232 __entry->qsevent = qsevent;
233 ),
234
235 TP_printk("%s %lu %d %s",
236 __entry->rcuname, __entry->gpnum,
237 __entry->cpu, __entry->qsevent)
238);
239
240#endif /* #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) */
241
242/*
243 * Tracepoint for dyntick-idle entry/exit events. These take a string
244 * as argument: "Start" for entering dyntick-idle mode and "End" for
245 * leaving it.
246 */
247TRACE_EVENT(rcu_dyntick,
248
249 TP_PROTO(char *polarity),
250
251 TP_ARGS(polarity),
252
253 TP_STRUCT__entry(
254 __field(char *, polarity)
255 ),
256
257 TP_fast_assign(
258 __entry->polarity = polarity;
259 ),
260
261 TP_printk("%s", __entry->polarity)
262);
263
264/*
265 * Tracepoint for the registration of a single RCU callback function.
266 * The first argument is the type of RCU, the second argument is
267 * a pointer to the RCU callback itself, and the third element is the
268 * new RCU callback queue length for the current CPU.
269 */
270TRACE_EVENT(rcu_callback,
271
272 TP_PROTO(char *rcuname, struct rcu_head *rhp, long qlen),
273
274 TP_ARGS(rcuname, rhp, qlen),
275
276 TP_STRUCT__entry(
277 __field(char *, rcuname)
278 __field(void *, rhp)
279 __field(void *, func)
280 __field(long, qlen)
281 ),
282
283 TP_fast_assign(
284 __entry->rcuname = rcuname;
285 __entry->rhp = rhp;
286 __entry->func = rhp->func;
287 __entry->qlen = qlen;
288 ),
289
290 TP_printk("%s rhp=%p func=%pf %ld",
291 __entry->rcuname, __entry->rhp, __entry->func, __entry->qlen)
292);
293
294/*
295 * Tracepoint for the registration of a single RCU callback of the special
296 * kfree() form. The first argument is the RCU type, the second argument
297 * is a pointer to the RCU callback, the third argument is the offset
298 * of the callback within the enclosing RCU-protected data structure,
299 * and the fourth argument is the new RCU callback queue length for the
300 * current CPU.
301 */
302TRACE_EVENT(rcu_kfree_callback,
303
304 TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset,
305 long qlen),
306
307 TP_ARGS(rcuname, rhp, offset, qlen),
308
309 TP_STRUCT__entry(
310 __field(char *, rcuname)
311 __field(void *, rhp)
312 __field(unsigned long, offset)
313 __field(long, qlen)
314 ),
315
316 TP_fast_assign(
317 __entry->rcuname = rcuname;
318 __entry->rhp = rhp;
319 __entry->offset = offset;
320 __entry->qlen = qlen;
321 ),
322
323 TP_printk("%s rhp=%p func=%ld %ld",
324 __entry->rcuname, __entry->rhp, __entry->offset,
325 __entry->qlen)
326);
327
328/*
329 * Tracepoint for marking the beginning rcu_do_batch, performed to start
330 * RCU callback invocation. The first argument is the RCU flavor,
331 * the second is the total number of callbacks (including those that
332 * are not yet ready to be invoked), and the third argument is the
333 * current RCU-callback batch limit.
334 */
335TRACE_EVENT(rcu_batch_start,
336
337 TP_PROTO(char *rcuname, long qlen, int blimit),
338
339 TP_ARGS(rcuname, qlen, blimit),
340
341 TP_STRUCT__entry(
342 __field(char *, rcuname)
343 __field(long, qlen)
344 __field(int, blimit)
345 ),
346
347 TP_fast_assign(
348 __entry->rcuname = rcuname;
349 __entry->qlen = qlen;
350 __entry->blimit = blimit;
351 ),
352
353 TP_printk("%s CBs=%ld bl=%d",
354 __entry->rcuname, __entry->qlen, __entry->blimit)
355);
356
357/*
358 * Tracepoint for the invocation of a single RCU callback function.
359 * The first argument is the type of RCU, and the second argument is
360 * a pointer to the RCU callback itself.
361 */
362TRACE_EVENT(rcu_invoke_callback,
363
364 TP_PROTO(char *rcuname, struct rcu_head *rhp),
365
366 TP_ARGS(rcuname, rhp),
367
368 TP_STRUCT__entry(
369 __field(char *, rcuname)
370 __field(void *, rhp)
371 __field(void *, func)
372 ),
373
374 TP_fast_assign(
375 __entry->rcuname = rcuname;
376 __entry->rhp = rhp;
377 __entry->func = rhp->func;
378 ),
379
380 TP_printk("%s rhp=%p func=%pf",
381 __entry->rcuname, __entry->rhp, __entry->func)
382);
383
384/*
385 * Tracepoint for the invocation of a single RCU callback of the special
386 * kfree() form. The first argument is the RCU flavor, the second
387 * argument is a pointer to the RCU callback, and the third argument
388 * is the offset of the callback within the enclosing RCU-protected
389 * data structure.
390 */
391TRACE_EVENT(rcu_invoke_kfree_callback,
392
393 TP_PROTO(char *rcuname, struct rcu_head *rhp, unsigned long offset),
394
395 TP_ARGS(rcuname, rhp, offset),
396
397 TP_STRUCT__entry(
398 __field(char *, rcuname)
399 __field(void *, rhp)
400 __field(unsigned long, offset)
401 ),
402
403 TP_fast_assign(
404 __entry->rcuname = rcuname;
405 __entry->rhp = rhp;
406 __entry->offset = offset;
407 ),
408
409 TP_printk("%s rhp=%p func=%ld",
410 __entry->rcuname, __entry->rhp, __entry->offset)
411);
412
413/*
414 * Tracepoint for exiting rcu_do_batch after RCU callbacks have been
415 * invoked. The first argument is the name of the RCU flavor and
416 * the second argument is number of callbacks actually invoked.
417 */
418TRACE_EVENT(rcu_batch_end,
419
420 TP_PROTO(char *rcuname, int callbacks_invoked),
421
422 TP_ARGS(rcuname, callbacks_invoked),
423
424 TP_STRUCT__entry(
425 __field(char *, rcuname)
426 __field(int, callbacks_invoked)
427 ),
428
429 TP_fast_assign(
430 __entry->rcuname = rcuname;
431 __entry->callbacks_invoked = callbacks_invoked;
432 ),
433
434 TP_printk("%s CBs-invoked=%d",
435 __entry->rcuname, __entry->callbacks_invoked)
436);
437
438#else /* #ifdef CONFIG_RCU_TRACE */
439
440#define trace_rcu_grace_period(rcuname, gpnum, gpevent) do { } while (0)
441#define trace_rcu_grace_period_init(rcuname, gpnum, level, grplo, grphi, qsmask) do { } while (0)
442#define trace_rcu_preempt_task(rcuname, pid, gpnum) do { } while (0)
443#define trace_rcu_unlock_preempted_task(rcuname, gpnum, pid) do { } while (0)
444#define trace_rcu_quiescent_state_report(rcuname, gpnum, mask, qsmask, level, grplo, grphi, gp_tasks) do { } while (0)
445#define trace_rcu_fqs(rcuname, gpnum, cpu, qsevent) do { } while (0)
446#define trace_rcu_dyntick(polarity) do { } while (0)
447#define trace_rcu_callback(rcuname, rhp, qlen) do { } while (0)
448#define trace_rcu_kfree_callback(rcuname, rhp, offset, qlen) do { } while (0)
449#define trace_rcu_batch_start(rcuname, qlen, blimit) do { } while (0)
450#define trace_rcu_invoke_callback(rcuname, rhp) do { } while (0)
451#define trace_rcu_invoke_kfree_callback(rcuname, rhp, offset) do { } while (0)
452#define trace_rcu_batch_end(rcuname, callbacks_invoked) do { } while (0)
453
454#endif /* #else #ifdef CONFIG_RCU_TRACE */
455
456#endif /* _TRACE_RCU_H */
457
458/* This part must be outside protection */
459#include <trace/define_trace.h>
diff --git a/init/Kconfig b/init/Kconfig
index d62778390e55..dc7e27bf89a8 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -391,7 +391,7 @@ config TREE_RCU
391 391
392config TREE_PREEMPT_RCU 392config TREE_PREEMPT_RCU
393 bool "Preemptible tree-based hierarchical RCU" 393 bool "Preemptible tree-based hierarchical RCU"
394 depends on PREEMPT 394 depends on PREEMPT && SMP
395 help 395 help
396 This option selects the RCU implementation that is 396 This option selects the RCU implementation that is
397 designed for very large SMP systems with hundreds or 397 designed for very large SMP systems with hundreds or
@@ -401,7 +401,7 @@ config TREE_PREEMPT_RCU
401 401
402config TINY_RCU 402config TINY_RCU
403 bool "UP-only small-memory-footprint RCU" 403 bool "UP-only small-memory-footprint RCU"
404 depends on !SMP 404 depends on !PREEMPT && !SMP
405 help 405 help
406 This option selects the RCU implementation that is 406 This option selects the RCU implementation that is
407 designed for UP systems from which real-time response 407 designed for UP systems from which real-time response
@@ -410,7 +410,7 @@ config TINY_RCU
410 410
411config TINY_PREEMPT_RCU 411config TINY_PREEMPT_RCU
412 bool "Preemptible UP-only small-memory-footprint RCU" 412 bool "Preemptible UP-only small-memory-footprint RCU"
413 depends on !SMP && PREEMPT 413 depends on PREEMPT && !SMP
414 help 414 help
415 This option selects the RCU implementation that is designed 415 This option selects the RCU implementation that is designed
416 for real-time UP systems. This option greatly reduces the 416 for real-time UP systems. This option greatly reduces the
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index c081fa967c8f..e69434b070da 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -1145,10 +1145,11 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth,
1145 if (debug_locks_silent) 1145 if (debug_locks_silent)
1146 return 0; 1146 return 0;
1147 1147
1148 printk("\n=======================================================\n"); 1148 printk("\n");
1149 printk( "[ INFO: possible circular locking dependency detected ]\n"); 1149 printk("======================================================\n");
1150 printk("[ INFO: possible circular locking dependency detected ]\n");
1150 print_kernel_version(); 1151 print_kernel_version();
1151 printk( "-------------------------------------------------------\n"); 1152 printk("-------------------------------------------------------\n");
1152 printk("%s/%d is trying to acquire lock:\n", 1153 printk("%s/%d is trying to acquire lock:\n",
1153 curr->comm, task_pid_nr(curr)); 1154 curr->comm, task_pid_nr(curr));
1154 print_lock(check_src); 1155 print_lock(check_src);
@@ -1482,11 +1483,12 @@ print_bad_irq_dependency(struct task_struct *curr,
1482 if (!debug_locks_off_graph_unlock() || debug_locks_silent) 1483 if (!debug_locks_off_graph_unlock() || debug_locks_silent)
1483 return 0; 1484 return 0;
1484 1485
1485 printk("\n======================================================\n"); 1486 printk("\n");
1486 printk( "[ INFO: %s-safe -> %s-unsafe lock order detected ]\n", 1487 printk("======================================================\n");
1488 printk("[ INFO: %s-safe -> %s-unsafe lock order detected ]\n",
1487 irqclass, irqclass); 1489 irqclass, irqclass);
1488 print_kernel_version(); 1490 print_kernel_version();
1489 printk( "------------------------------------------------------\n"); 1491 printk("------------------------------------------------------\n");
1490 printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", 1492 printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n",
1491 curr->comm, task_pid_nr(curr), 1493 curr->comm, task_pid_nr(curr),
1492 curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT, 1494 curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT,
@@ -1711,10 +1713,11 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev,
1711 if (!debug_locks_off_graph_unlock() || debug_locks_silent) 1713 if (!debug_locks_off_graph_unlock() || debug_locks_silent)
1712 return 0; 1714 return 0;
1713 1715
1714 printk("\n=============================================\n"); 1716 printk("\n");
1715 printk( "[ INFO: possible recursive locking detected ]\n"); 1717 printk("=============================================\n");
1718 printk("[ INFO: possible recursive locking detected ]\n");
1716 print_kernel_version(); 1719 print_kernel_version();
1717 printk( "---------------------------------------------\n"); 1720 printk("---------------------------------------------\n");
1718 printk("%s/%d is trying to acquire lock:\n", 1721 printk("%s/%d is trying to acquire lock:\n",
1719 curr->comm, task_pid_nr(curr)); 1722 curr->comm, task_pid_nr(curr));
1720 print_lock(next); 1723 print_lock(next);
@@ -2217,10 +2220,11 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this,
2217 if (!debug_locks_off_graph_unlock() || debug_locks_silent) 2220 if (!debug_locks_off_graph_unlock() || debug_locks_silent)
2218 return 0; 2221 return 0;
2219 2222
2220 printk("\n=================================\n"); 2223 printk("\n");
2221 printk( "[ INFO: inconsistent lock state ]\n"); 2224 printk("=================================\n");
2225 printk("[ INFO: inconsistent lock state ]\n");
2222 print_kernel_version(); 2226 print_kernel_version();
2223 printk( "---------------------------------\n"); 2227 printk("---------------------------------\n");
2224 2228
2225 printk("inconsistent {%s} -> {%s} usage.\n", 2229 printk("inconsistent {%s} -> {%s} usage.\n",
2226 usage_str[prev_bit], usage_str[new_bit]); 2230 usage_str[prev_bit], usage_str[new_bit]);
@@ -2281,10 +2285,11 @@ print_irq_inversion_bug(struct task_struct *curr,
2281 if (!debug_locks_off_graph_unlock() || debug_locks_silent) 2285 if (!debug_locks_off_graph_unlock() || debug_locks_silent)
2282 return 0; 2286 return 0;
2283 2287
2284 printk("\n=========================================================\n"); 2288 printk("\n");
2285 printk( "[ INFO: possible irq lock inversion dependency detected ]\n"); 2289 printk("=========================================================\n");
2290 printk("[ INFO: possible irq lock inversion dependency detected ]\n");
2286 print_kernel_version(); 2291 print_kernel_version();
2287 printk( "---------------------------------------------------------\n"); 2292 printk("---------------------------------------------------------\n");
2288 printk("%s/%d just changed the state of lock:\n", 2293 printk("%s/%d just changed the state of lock:\n",
2289 curr->comm, task_pid_nr(curr)); 2294 curr->comm, task_pid_nr(curr));
2290 print_lock(this); 2295 print_lock(this);
@@ -3161,9 +3166,10 @@ print_unlock_inbalance_bug(struct task_struct *curr, struct lockdep_map *lock,
3161 if (debug_locks_silent) 3166 if (debug_locks_silent)
3162 return 0; 3167 return 0;
3163 3168
3164 printk("\n=====================================\n"); 3169 printk("\n");
3165 printk( "[ BUG: bad unlock balance detected! ]\n"); 3170 printk("=====================================\n");
3166 printk( "-------------------------------------\n"); 3171 printk("[ BUG: bad unlock balance detected! ]\n");
3172 printk("-------------------------------------\n");
3167 printk("%s/%d is trying to release lock (", 3173 printk("%s/%d is trying to release lock (",
3168 curr->comm, task_pid_nr(curr)); 3174 curr->comm, task_pid_nr(curr));
3169 print_lockdep_cache(lock); 3175 print_lockdep_cache(lock);
@@ -3604,9 +3610,10 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock,
3604 if (debug_locks_silent) 3610 if (debug_locks_silent)
3605 return 0; 3611 return 0;
3606 3612
3607 printk("\n=================================\n"); 3613 printk("\n");
3608 printk( "[ BUG: bad contention detected! ]\n"); 3614 printk("=================================\n");
3609 printk( "---------------------------------\n"); 3615 printk("[ BUG: bad contention detected! ]\n");
3616 printk("---------------------------------\n");
3610 printk("%s/%d is trying to contend lock (", 3617 printk("%s/%d is trying to contend lock (",
3611 curr->comm, task_pid_nr(curr)); 3618 curr->comm, task_pid_nr(curr));
3612 print_lockdep_cache(lock); 3619 print_lockdep_cache(lock);
@@ -3977,9 +3984,10 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from,
3977 if (debug_locks_silent) 3984 if (debug_locks_silent)
3978 return; 3985 return;
3979 3986
3980 printk("\n=========================\n"); 3987 printk("\n");
3981 printk( "[ BUG: held lock freed! ]\n"); 3988 printk("=========================\n");
3982 printk( "-------------------------\n"); 3989 printk("[ BUG: held lock freed! ]\n");
3990 printk("-------------------------\n");
3983 printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n", 3991 printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n",
3984 curr->comm, task_pid_nr(curr), mem_from, mem_to-1); 3992 curr->comm, task_pid_nr(curr), mem_from, mem_to-1);
3985 print_lock(hlock); 3993 print_lock(hlock);
@@ -4033,9 +4041,10 @@ static void print_held_locks_bug(struct task_struct *curr)
4033 if (debug_locks_silent) 4041 if (debug_locks_silent)
4034 return; 4042 return;
4035 4043
4036 printk("\n=====================================\n"); 4044 printk("\n");
4037 printk( "[ BUG: lock held at task exit time! ]\n"); 4045 printk("=====================================\n");
4038 printk( "-------------------------------------\n"); 4046 printk("[ BUG: lock held at task exit time! ]\n");
4047 printk("-------------------------------------\n");
4039 printk("%s/%d is exiting with locks still held!\n", 4048 printk("%s/%d is exiting with locks still held!\n",
4040 curr->comm, task_pid_nr(curr)); 4049 curr->comm, task_pid_nr(curr));
4041 lockdep_print_held_locks(curr); 4050 lockdep_print_held_locks(curr);
@@ -4129,16 +4138,17 @@ void lockdep_sys_exit(void)
4129 if (unlikely(curr->lockdep_depth)) { 4138 if (unlikely(curr->lockdep_depth)) {
4130 if (!debug_locks_off()) 4139 if (!debug_locks_off())
4131 return; 4140 return;
4132 printk("\n================================================\n"); 4141 printk("\n");
4133 printk( "[ BUG: lock held when returning to user space! ]\n"); 4142 printk("================================================\n");
4134 printk( "------------------------------------------------\n"); 4143 printk("[ BUG: lock held when returning to user space! ]\n");
4144 printk("------------------------------------------------\n");
4135 printk("%s/%d is leaving the kernel with locks still held!\n", 4145 printk("%s/%d is leaving the kernel with locks still held!\n",
4136 curr->comm, curr->pid); 4146 curr->comm, curr->pid);
4137 lockdep_print_held_locks(curr); 4147 lockdep_print_held_locks(curr);
4138 } 4148 }
4139} 4149}
4140 4150
4141void lockdep_rcu_dereference(const char *file, const int line) 4151void lockdep_rcu_suspicious(const char *file, const int line, const char *s)
4142{ 4152{
4143 struct task_struct *curr = current; 4153 struct task_struct *curr = current;
4144 4154
@@ -4147,15 +4157,15 @@ void lockdep_rcu_dereference(const char *file, const int line)
4147 return; 4157 return;
4148#endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */ 4158#endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */
4149 /* Note: the following can be executed concurrently, so be careful. */ 4159 /* Note: the following can be executed concurrently, so be careful. */
4150 printk("\n===================================================\n"); 4160 printk("\n");
4151 printk( "[ INFO: suspicious rcu_dereference_check() usage. ]\n"); 4161 printk("===============================\n");
4152 printk( "---------------------------------------------------\n"); 4162 printk("[ INFO: suspicious RCU usage. ]\n");
4153 printk("%s:%d invoked rcu_dereference_check() without protection!\n", 4163 printk("-------------------------------\n");
4154 file, line); 4164 printk("%s:%d %s!\n", file, line, s);
4155 printk("\nother info that might help us debug this:\n\n"); 4165 printk("\nother info that might help us debug this:\n\n");
4156 printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks); 4166 printk("\nrcu_scheduler_active = %d, debug_locks = %d\n", rcu_scheduler_active, debug_locks);
4157 lockdep_print_held_locks(curr); 4167 lockdep_print_held_locks(curr);
4158 printk("\nstack backtrace:\n"); 4168 printk("\nstack backtrace:\n");
4159 dump_stack(); 4169 dump_stack();
4160} 4170}
4161EXPORT_SYMBOL_GPL(lockdep_rcu_dereference); 4171EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious);
diff --git a/kernel/pid.c b/kernel/pid.c
index e432057f3b21..8cafe7e72ad2 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -418,7 +418,9 @@ EXPORT_SYMBOL(pid_task);
418 */ 418 */
419struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns) 419struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
420{ 420{
421 rcu_lockdep_assert(rcu_read_lock_held()); 421 rcu_lockdep_assert(rcu_read_lock_held(),
422 "find_task_by_pid_ns() needs rcu_read_lock()"
423 " protection");
422 return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID); 424 return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
423} 425}
424 426
diff --git a/kernel/rcu.h b/kernel/rcu.h
new file mode 100644
index 000000000000..f600868d550d
--- /dev/null
+++ b/kernel/rcu.h
@@ -0,0 +1,85 @@
1/*
2 * Read-Copy Update definitions shared among RCU implementations.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright IBM Corporation, 2011
19 *
20 * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
21 */
22
23#ifndef __LINUX_RCU_H
24#define __LINUX_RCU_H
25
26#ifdef CONFIG_RCU_TRACE
27#define RCU_TRACE(stmt) stmt
28#else /* #ifdef CONFIG_RCU_TRACE */
29#define RCU_TRACE(stmt)
30#endif /* #else #ifdef CONFIG_RCU_TRACE */
31
32/*
33 * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally
34 * by call_rcu() and rcu callback execution, and are therefore not part of the
35 * RCU API. Leaving in rcupdate.h because they are used by all RCU flavors.
36 */
37
38#ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
39# define STATE_RCU_HEAD_READY 0
40# define STATE_RCU_HEAD_QUEUED 1
41
42extern struct debug_obj_descr rcuhead_debug_descr;
43
44static inline void debug_rcu_head_queue(struct rcu_head *head)
45{
46 WARN_ON_ONCE((unsigned long)head & 0x3);
47 debug_object_activate(head, &rcuhead_debug_descr);
48 debug_object_active_state(head, &rcuhead_debug_descr,
49 STATE_RCU_HEAD_READY,
50 STATE_RCU_HEAD_QUEUED);
51}
52
53static inline void debug_rcu_head_unqueue(struct rcu_head *head)
54{
55 debug_object_active_state(head, &rcuhead_debug_descr,
56 STATE_RCU_HEAD_QUEUED,
57 STATE_RCU_HEAD_READY);
58 debug_object_deactivate(head, &rcuhead_debug_descr);
59}
60#else /* !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
61static inline void debug_rcu_head_queue(struct rcu_head *head)
62{
63}
64
65static inline void debug_rcu_head_unqueue(struct rcu_head *head)
66{
67}
68#endif /* #else !CONFIG_DEBUG_OBJECTS_RCU_HEAD */
69
70extern void kfree(const void *);
71
72static inline void __rcu_reclaim(char *rn, struct rcu_head *head)
73{
74 unsigned long offset = (unsigned long)head->func;
75
76 if (__is_kfree_rcu_offset(offset)) {
77 RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset));
78 kfree((void *)head - offset);
79 } else {
80 RCU_TRACE(trace_rcu_invoke_callback(rn, head));
81 head->func(head);
82 }
83}
84
85#endif /* __LINUX_RCU_H */
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index ddddb320be61..ca0d23b6b3e8 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -46,6 +46,11 @@
46#include <linux/module.h> 46#include <linux/module.h>
47#include <linux/hardirq.h> 47#include <linux/hardirq.h>
48 48
49#define CREATE_TRACE_POINTS
50#include <trace/events/rcu.h>
51
52#include "rcu.h"
53
49#ifdef CONFIG_DEBUG_LOCK_ALLOC 54#ifdef CONFIG_DEBUG_LOCK_ALLOC
50static struct lock_class_key rcu_lock_key; 55static struct lock_class_key rcu_lock_key;
51struct lockdep_map rcu_lock_map = 56struct lockdep_map rcu_lock_map =
@@ -94,11 +99,16 @@ EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
94 99
95#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 100#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
96 101
102struct rcu_synchronize {
103 struct rcu_head head;
104 struct completion completion;
105};
106
97/* 107/*
98 * Awaken the corresponding synchronize_rcu() instance now that a 108 * Awaken the corresponding synchronize_rcu() instance now that a
99 * grace period has elapsed. 109 * grace period has elapsed.
100 */ 110 */
101void wakeme_after_rcu(struct rcu_head *head) 111static void wakeme_after_rcu(struct rcu_head *head)
102{ 112{
103 struct rcu_synchronize *rcu; 113 struct rcu_synchronize *rcu;
104 114
@@ -106,6 +116,20 @@ void wakeme_after_rcu(struct rcu_head *head)
106 complete(&rcu->completion); 116 complete(&rcu->completion);
107} 117}
108 118
119void wait_rcu_gp(call_rcu_func_t crf)
120{
121 struct rcu_synchronize rcu;
122
123 init_rcu_head_on_stack(&rcu.head);
124 init_completion(&rcu.completion);
125 /* Will wake me after RCU finished. */
126 crf(&rcu.head, wakeme_after_rcu);
127 /* Wait for it. */
128 wait_for_completion(&rcu.completion);
129 destroy_rcu_head_on_stack(&rcu.head);
130}
131EXPORT_SYMBOL_GPL(wait_rcu_gp);
132
109#ifdef CONFIG_PROVE_RCU 133#ifdef CONFIG_PROVE_RCU
110/* 134/*
111 * wrapper function to avoid #include problems. 135 * wrapper function to avoid #include problems.
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 7bbac7d0f5ab..da775c87f27f 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -37,16 +37,17 @@
37#include <linux/cpu.h> 37#include <linux/cpu.h>
38#include <linux/prefetch.h> 38#include <linux/prefetch.h>
39 39
40/* Controls for rcu_kthread() kthread, replacing RCU_SOFTIRQ used previously. */ 40#ifdef CONFIG_RCU_TRACE
41static struct task_struct *rcu_kthread_task; 41#include <trace/events/rcu.h>
42static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq); 42#endif /* #else #ifdef CONFIG_RCU_TRACE */
43static unsigned long have_rcu_kthread_work; 43
44#include "rcu.h"
44 45
45/* Forward declarations for rcutiny_plugin.h. */ 46/* Forward declarations for rcutiny_plugin.h. */
46struct rcu_ctrlblk; 47struct rcu_ctrlblk;
47static void invoke_rcu_kthread(void); 48static void invoke_rcu_callbacks(void);
48static void rcu_process_callbacks(struct rcu_ctrlblk *rcp); 49static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp);
49static int rcu_kthread(void *arg); 50static void rcu_process_callbacks(struct softirq_action *unused);
50static void __call_rcu(struct rcu_head *head, 51static void __call_rcu(struct rcu_head *head,
51 void (*func)(struct rcu_head *rcu), 52 void (*func)(struct rcu_head *rcu),
52 struct rcu_ctrlblk *rcp); 53 struct rcu_ctrlblk *rcp);
@@ -96,16 +97,6 @@ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
96} 97}
97 98
98/* 99/*
99 * Wake up rcu_kthread() to process callbacks now eligible for invocation
100 * or to boost readers.
101 */
102static void invoke_rcu_kthread(void)
103{
104 have_rcu_kthread_work = 1;
105 wake_up(&rcu_kthread_wq);
106}
107
108/*
109 * Record an rcu quiescent state. And an rcu_bh quiescent state while we 100 * Record an rcu quiescent state. And an rcu_bh quiescent state while we
110 * are at it, given that any rcu quiescent state is also an rcu_bh 101 * are at it, given that any rcu quiescent state is also an rcu_bh
111 * quiescent state. Use "+" instead of "||" to defeat short circuiting. 102 * quiescent state. Use "+" instead of "||" to defeat short circuiting.
@@ -117,7 +108,7 @@ void rcu_sched_qs(int cpu)
117 local_irq_save(flags); 108 local_irq_save(flags);
118 if (rcu_qsctr_help(&rcu_sched_ctrlblk) + 109 if (rcu_qsctr_help(&rcu_sched_ctrlblk) +
119 rcu_qsctr_help(&rcu_bh_ctrlblk)) 110 rcu_qsctr_help(&rcu_bh_ctrlblk))
120 invoke_rcu_kthread(); 111 invoke_rcu_callbacks();
121 local_irq_restore(flags); 112 local_irq_restore(flags);
122} 113}
123 114
@@ -130,7 +121,7 @@ void rcu_bh_qs(int cpu)
130 121
131 local_irq_save(flags); 122 local_irq_save(flags);
132 if (rcu_qsctr_help(&rcu_bh_ctrlblk)) 123 if (rcu_qsctr_help(&rcu_bh_ctrlblk))
133 invoke_rcu_kthread(); 124 invoke_rcu_callbacks();
134 local_irq_restore(flags); 125 local_irq_restore(flags);
135} 126}
136 127
@@ -154,18 +145,23 @@ void rcu_check_callbacks(int cpu, int user)
154 * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure 145 * Invoke the RCU callbacks on the specified rcu_ctrlkblk structure
155 * whose grace period has elapsed. 146 * whose grace period has elapsed.
156 */ 147 */
157static void rcu_process_callbacks(struct rcu_ctrlblk *rcp) 148static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp)
158{ 149{
150 char *rn = NULL;
159 struct rcu_head *next, *list; 151 struct rcu_head *next, *list;
160 unsigned long flags; 152 unsigned long flags;
161 RCU_TRACE(int cb_count = 0); 153 RCU_TRACE(int cb_count = 0);
162 154
163 /* If no RCU callbacks ready to invoke, just return. */ 155 /* If no RCU callbacks ready to invoke, just return. */
164 if (&rcp->rcucblist == rcp->donetail) 156 if (&rcp->rcucblist == rcp->donetail) {
157 RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1));
158 RCU_TRACE(trace_rcu_batch_end(rcp->name, 0));
165 return; 159 return;
160 }
166 161
167 /* Move the ready-to-invoke callbacks to a local list. */ 162 /* Move the ready-to-invoke callbacks to a local list. */
168 local_irq_save(flags); 163 local_irq_save(flags);
164 RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, -1));
169 list = rcp->rcucblist; 165 list = rcp->rcucblist;
170 rcp->rcucblist = *rcp->donetail; 166 rcp->rcucblist = *rcp->donetail;
171 *rcp->donetail = NULL; 167 *rcp->donetail = NULL;
@@ -176,49 +172,26 @@ static void rcu_process_callbacks(struct rcu_ctrlblk *rcp)
176 local_irq_restore(flags); 172 local_irq_restore(flags);
177 173
178 /* Invoke the callbacks on the local list. */ 174 /* Invoke the callbacks on the local list. */
175 RCU_TRACE(rn = rcp->name);
179 while (list) { 176 while (list) {
180 next = list->next; 177 next = list->next;
181 prefetch(next); 178 prefetch(next);
182 debug_rcu_head_unqueue(list); 179 debug_rcu_head_unqueue(list);
183 local_bh_disable(); 180 local_bh_disable();
184 __rcu_reclaim(list); 181 __rcu_reclaim(rn, list);
185 local_bh_enable(); 182 local_bh_enable();
186 list = next; 183 list = next;
187 RCU_TRACE(cb_count++); 184 RCU_TRACE(cb_count++);
188 } 185 }
189 RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); 186 RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count));
187 RCU_TRACE(trace_rcu_batch_end(rcp->name, cb_count));
190} 188}
191 189
192/* 190static void rcu_process_callbacks(struct softirq_action *unused)
193 * This kthread invokes RCU callbacks whose grace periods have
194 * elapsed. It is awakened as needed, and takes the place of the
195 * RCU_SOFTIRQ that was used previously for this purpose.
196 * This is a kthread, but it is never stopped, at least not until
197 * the system goes down.
198 */
199static int rcu_kthread(void *arg)
200{ 191{
201 unsigned long work; 192 __rcu_process_callbacks(&rcu_sched_ctrlblk);
202 unsigned long morework; 193 __rcu_process_callbacks(&rcu_bh_ctrlblk);
203 unsigned long flags; 194 rcu_preempt_process_callbacks();
204
205 for (;;) {
206 wait_event_interruptible(rcu_kthread_wq,
207 have_rcu_kthread_work != 0);
208 morework = rcu_boost();
209 local_irq_save(flags);
210 work = have_rcu_kthread_work;
211 have_rcu_kthread_work = morework;
212 local_irq_restore(flags);
213 if (work) {
214 rcu_process_callbacks(&rcu_sched_ctrlblk);
215 rcu_process_callbacks(&rcu_bh_ctrlblk);
216 rcu_preempt_process_callbacks();
217 }
218 schedule_timeout_interruptible(1); /* Leave CPU for others. */
219 }
220
221 return 0; /* Not reached, but needed to shut gcc up. */
222} 195}
223 196
224/* 197/*
@@ -280,45 +253,3 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
280 __call_rcu(head, func, &rcu_bh_ctrlblk); 253 __call_rcu(head, func, &rcu_bh_ctrlblk);
281} 254}
282EXPORT_SYMBOL_GPL(call_rcu_bh); 255EXPORT_SYMBOL_GPL(call_rcu_bh);
283
284void rcu_barrier_bh(void)
285{
286 struct rcu_synchronize rcu;
287
288 init_rcu_head_on_stack(&rcu.head);
289 init_completion(&rcu.completion);
290 /* Will wake me after RCU finished. */
291 call_rcu_bh(&rcu.head, wakeme_after_rcu);
292 /* Wait for it. */
293 wait_for_completion(&rcu.completion);
294 destroy_rcu_head_on_stack(&rcu.head);
295}
296EXPORT_SYMBOL_GPL(rcu_barrier_bh);
297
298void rcu_barrier_sched(void)
299{
300 struct rcu_synchronize rcu;
301
302 init_rcu_head_on_stack(&rcu.head);
303 init_completion(&rcu.completion);
304 /* Will wake me after RCU finished. */
305 call_rcu_sched(&rcu.head, wakeme_after_rcu);
306 /* Wait for it. */
307 wait_for_completion(&rcu.completion);
308 destroy_rcu_head_on_stack(&rcu.head);
309}
310EXPORT_SYMBOL_GPL(rcu_barrier_sched);
311
312/*
313 * Spawn the kthread that invokes RCU callbacks.
314 */
315static int __init rcu_spawn_kthreads(void)
316{
317 struct sched_param sp;
318
319 rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread");
320 sp.sched_priority = RCU_BOOST_PRIO;
321 sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp);
322 return 0;
323}
324early_initcall(rcu_spawn_kthreads);
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
index f259c676195f..02aa7139861c 100644
--- a/kernel/rcutiny_plugin.h
+++ b/kernel/rcutiny_plugin.h
@@ -26,29 +26,26 @@
26#include <linux/debugfs.h> 26#include <linux/debugfs.h>
27#include <linux/seq_file.h> 27#include <linux/seq_file.h>
28 28
29#ifdef CONFIG_RCU_TRACE
30#define RCU_TRACE(stmt) stmt
31#else /* #ifdef CONFIG_RCU_TRACE */
32#define RCU_TRACE(stmt)
33#endif /* #else #ifdef CONFIG_RCU_TRACE */
34
35/* Global control variables for rcupdate callback mechanism. */ 29/* Global control variables for rcupdate callback mechanism. */
36struct rcu_ctrlblk { 30struct rcu_ctrlblk {
37 struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */ 31 struct rcu_head *rcucblist; /* List of pending callbacks (CBs). */
38 struct rcu_head **donetail; /* ->next pointer of last "done" CB. */ 32 struct rcu_head **donetail; /* ->next pointer of last "done" CB. */
39 struct rcu_head **curtail; /* ->next pointer of last CB. */ 33 struct rcu_head **curtail; /* ->next pointer of last CB. */
40 RCU_TRACE(long qlen); /* Number of pending CBs. */ 34 RCU_TRACE(long qlen); /* Number of pending CBs. */
35 RCU_TRACE(char *name); /* Name of RCU type. */
41}; 36};
42 37
43/* Definition for rcupdate control block. */ 38/* Definition for rcupdate control block. */
44static struct rcu_ctrlblk rcu_sched_ctrlblk = { 39static struct rcu_ctrlblk rcu_sched_ctrlblk = {
45 .donetail = &rcu_sched_ctrlblk.rcucblist, 40 .donetail = &rcu_sched_ctrlblk.rcucblist,
46 .curtail = &rcu_sched_ctrlblk.rcucblist, 41 .curtail = &rcu_sched_ctrlblk.rcucblist,
42 RCU_TRACE(.name = "rcu_sched")
47}; 43};
48 44
49static struct rcu_ctrlblk rcu_bh_ctrlblk = { 45static struct rcu_ctrlblk rcu_bh_ctrlblk = {
50 .donetail = &rcu_bh_ctrlblk.rcucblist, 46 .donetail = &rcu_bh_ctrlblk.rcucblist,
51 .curtail = &rcu_bh_ctrlblk.rcucblist, 47 .curtail = &rcu_bh_ctrlblk.rcucblist,
48 RCU_TRACE(.name = "rcu_bh")
52}; 49};
53 50
54#ifdef CONFIG_DEBUG_LOCK_ALLOC 51#ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -131,6 +128,7 @@ static struct rcu_preempt_ctrlblk rcu_preempt_ctrlblk = {
131 .rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist, 128 .rcb.curtail = &rcu_preempt_ctrlblk.rcb.rcucblist,
132 .nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist, 129 .nexttail = &rcu_preempt_ctrlblk.rcb.rcucblist,
133 .blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks), 130 .blkd_tasks = LIST_HEAD_INIT(rcu_preempt_ctrlblk.blkd_tasks),
131 RCU_TRACE(.rcb.name = "rcu_preempt")
134}; 132};
135 133
136static int rcu_preempted_readers_exp(void); 134static int rcu_preempted_readers_exp(void);
@@ -247,6 +245,13 @@ static void show_tiny_preempt_stats(struct seq_file *m)
247 245
248#include "rtmutex_common.h" 246#include "rtmutex_common.h"
249 247
248#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
249
250/* Controls for rcu_kthread() kthread. */
251static struct task_struct *rcu_kthread_task;
252static DECLARE_WAIT_QUEUE_HEAD(rcu_kthread_wq);
253static unsigned long have_rcu_kthread_work;
254
250/* 255/*
251 * Carry out RCU priority boosting on the task indicated by ->boost_tasks, 256 * Carry out RCU priority boosting on the task indicated by ->boost_tasks,
252 * and advance ->boost_tasks to the next task in the ->blkd_tasks list. 257 * and advance ->boost_tasks to the next task in the ->blkd_tasks list.
@@ -334,7 +339,7 @@ static int rcu_initiate_boost(void)
334 if (rcu_preempt_ctrlblk.exp_tasks == NULL) 339 if (rcu_preempt_ctrlblk.exp_tasks == NULL)
335 rcu_preempt_ctrlblk.boost_tasks = 340 rcu_preempt_ctrlblk.boost_tasks =
336 rcu_preempt_ctrlblk.gp_tasks; 341 rcu_preempt_ctrlblk.gp_tasks;
337 invoke_rcu_kthread(); 342 invoke_rcu_callbacks();
338 } else 343 } else
339 RCU_TRACE(rcu_initiate_boost_trace()); 344 RCU_TRACE(rcu_initiate_boost_trace());
340 return 1; 345 return 1;
@@ -353,14 +358,6 @@ static void rcu_preempt_boost_start_gp(void)
353#else /* #ifdef CONFIG_RCU_BOOST */ 358#else /* #ifdef CONFIG_RCU_BOOST */
354 359
355/* 360/*
356 * If there is no RCU priority boosting, we don't boost.
357 */
358static int rcu_boost(void)
359{
360 return 0;
361}
362
363/*
364 * If there is no RCU priority boosting, we don't initiate boosting, 361 * If there is no RCU priority boosting, we don't initiate boosting,
365 * but we do indicate whether there are blocked readers blocking the 362 * but we do indicate whether there are blocked readers blocking the
366 * current grace period. 363 * current grace period.
@@ -427,7 +424,7 @@ static void rcu_preempt_cpu_qs(void)
427 424
428 /* If there are done callbacks, cause them to be invoked. */ 425 /* If there are done callbacks, cause them to be invoked. */
429 if (*rcu_preempt_ctrlblk.rcb.donetail != NULL) 426 if (*rcu_preempt_ctrlblk.rcb.donetail != NULL)
430 invoke_rcu_kthread(); 427 invoke_rcu_callbacks();
431} 428}
432 429
433/* 430/*
@@ -648,7 +645,7 @@ static void rcu_preempt_check_callbacks(void)
648 rcu_preempt_cpu_qs(); 645 rcu_preempt_cpu_qs();
649 if (&rcu_preempt_ctrlblk.rcb.rcucblist != 646 if (&rcu_preempt_ctrlblk.rcb.rcucblist !=
650 rcu_preempt_ctrlblk.rcb.donetail) 647 rcu_preempt_ctrlblk.rcb.donetail)
651 invoke_rcu_kthread(); 648 invoke_rcu_callbacks();
652 if (rcu_preempt_gp_in_progress() && 649 if (rcu_preempt_gp_in_progress() &&
653 rcu_cpu_blocking_cur_gp() && 650 rcu_cpu_blocking_cur_gp() &&
654 rcu_preempt_running_reader()) 651 rcu_preempt_running_reader())
@@ -674,7 +671,7 @@ static void rcu_preempt_remove_callbacks(struct rcu_ctrlblk *rcp)
674 */ 671 */
675static void rcu_preempt_process_callbacks(void) 672static void rcu_preempt_process_callbacks(void)
676{ 673{
677 rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb); 674 __rcu_process_callbacks(&rcu_preempt_ctrlblk.rcb);
678} 675}
679 676
680/* 677/*
@@ -697,20 +694,6 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
697} 694}
698EXPORT_SYMBOL_GPL(call_rcu); 695EXPORT_SYMBOL_GPL(call_rcu);
699 696
700void rcu_barrier(void)
701{
702 struct rcu_synchronize rcu;
703
704 init_rcu_head_on_stack(&rcu.head);
705 init_completion(&rcu.completion);
706 /* Will wake me after RCU finished. */
707 call_rcu(&rcu.head, wakeme_after_rcu);
708 /* Wait for it. */
709 wait_for_completion(&rcu.completion);
710 destroy_rcu_head_on_stack(&rcu.head);
711}
712EXPORT_SYMBOL_GPL(rcu_barrier);
713
714/* 697/*
715 * synchronize_rcu - wait until a grace period has elapsed. 698 * synchronize_rcu - wait until a grace period has elapsed.
716 * 699 *
@@ -864,15 +847,6 @@ static void show_tiny_preempt_stats(struct seq_file *m)
864#endif /* #ifdef CONFIG_RCU_TRACE */ 847#endif /* #ifdef CONFIG_RCU_TRACE */
865 848
866/* 849/*
867 * Because preemptible RCU does not exist, it is never necessary to
868 * boost preempted RCU readers.
869 */
870static int rcu_boost(void)
871{
872 return 0;
873}
874
875/*
876 * Because preemptible RCU does not exist, it never has any callbacks 850 * Because preemptible RCU does not exist, it never has any callbacks
877 * to check. 851 * to check.
878 */ 852 */
@@ -898,6 +872,78 @@ static void rcu_preempt_process_callbacks(void)
898 872
899#endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */ 873#endif /* #else #ifdef CONFIG_TINY_PREEMPT_RCU */
900 874
875#ifdef CONFIG_RCU_BOOST
876
877/*
878 * Wake up rcu_kthread() to process callbacks now eligible for invocation
879 * or to boost readers.
880 */
881static void invoke_rcu_callbacks(void)
882{
883 have_rcu_kthread_work = 1;
884 wake_up(&rcu_kthread_wq);
885}
886
887/*
888 * This kthread invokes RCU callbacks whose grace periods have
889 * elapsed. It is awakened as needed, and takes the place of the
890 * RCU_SOFTIRQ that is used for this purpose when boosting is disabled.
891 * This is a kthread, but it is never stopped, at least not until
892 * the system goes down.
893 */
894static int rcu_kthread(void *arg)
895{
896 unsigned long work;
897 unsigned long morework;
898 unsigned long flags;
899
900 for (;;) {
901 wait_event_interruptible(rcu_kthread_wq,
902 have_rcu_kthread_work != 0);
903 morework = rcu_boost();
904 local_irq_save(flags);
905 work = have_rcu_kthread_work;
906 have_rcu_kthread_work = morework;
907 local_irq_restore(flags);
908 if (work)
909 rcu_process_callbacks(NULL);
910 schedule_timeout_interruptible(1); /* Leave CPU for others. */
911 }
912
913 return 0; /* Not reached, but needed to shut gcc up. */
914}
915
916/*
917 * Spawn the kthread that invokes RCU callbacks.
918 */
919static int __init rcu_spawn_kthreads(void)
920{
921 struct sched_param sp;
922
923 rcu_kthread_task = kthread_run(rcu_kthread, NULL, "rcu_kthread");
924 sp.sched_priority = RCU_BOOST_PRIO;
925 sched_setscheduler_nocheck(rcu_kthread_task, SCHED_FIFO, &sp);
926 return 0;
927}
928early_initcall(rcu_spawn_kthreads);
929
930#else /* #ifdef CONFIG_RCU_BOOST */
931
932/*
933 * Start up softirq processing of callbacks.
934 */
935void invoke_rcu_callbacks(void)
936{
937 raise_softirq(RCU_SOFTIRQ);
938}
939
940void rcu_init(void)
941{
942 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
943}
944
945#endif /* #else #ifdef CONFIG_RCU_BOOST */
946
901#ifdef CONFIG_DEBUG_LOCK_ALLOC 947#ifdef CONFIG_DEBUG_LOCK_ALLOC
902#include <linux/kernel_stat.h> 948#include <linux/kernel_stat.h>
903 949
@@ -913,12 +959,6 @@ void __init rcu_scheduler_starting(void)
913 959
914#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ 960#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
915 961
916#ifdef CONFIG_RCU_BOOST
917#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
918#else /* #ifdef CONFIG_RCU_BOOST */
919#define RCU_BOOST_PRIO 1
920#endif /* #else #ifdef CONFIG_RCU_BOOST */
921
922#ifdef CONFIG_RCU_TRACE 962#ifdef CONFIG_RCU_TRACE
923 963
924#ifdef CONFIG_RCU_BOOST 964#ifdef CONFIG_RCU_BOOST
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 98f51b13bb7e..764825c2685c 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -73,7 +73,7 @@ module_param(nreaders, int, 0444);
73MODULE_PARM_DESC(nreaders, "Number of RCU reader threads"); 73MODULE_PARM_DESC(nreaders, "Number of RCU reader threads");
74module_param(nfakewriters, int, 0444); 74module_param(nfakewriters, int, 0444);
75MODULE_PARM_DESC(nfakewriters, "Number of RCU fake writer threads"); 75MODULE_PARM_DESC(nfakewriters, "Number of RCU fake writer threads");
76module_param(stat_interval, int, 0444); 76module_param(stat_interval, int, 0644);
77MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s"); 77MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s");
78module_param(verbose, bool, 0444); 78module_param(verbose, bool, 0444);
79MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s"); 79MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s");
@@ -480,30 +480,6 @@ static void rcu_bh_torture_deferred_free(struct rcu_torture *p)
480 call_rcu_bh(&p->rtort_rcu, rcu_torture_cb); 480 call_rcu_bh(&p->rtort_rcu, rcu_torture_cb);
481} 481}
482 482
483struct rcu_bh_torture_synchronize {
484 struct rcu_head head;
485 struct completion completion;
486};
487
488static void rcu_bh_torture_wakeme_after_cb(struct rcu_head *head)
489{
490 struct rcu_bh_torture_synchronize *rcu;
491
492 rcu = container_of(head, struct rcu_bh_torture_synchronize, head);
493 complete(&rcu->completion);
494}
495
496static void rcu_bh_torture_synchronize(void)
497{
498 struct rcu_bh_torture_synchronize rcu;
499
500 init_rcu_head_on_stack(&rcu.head);
501 init_completion(&rcu.completion);
502 call_rcu_bh(&rcu.head, rcu_bh_torture_wakeme_after_cb);
503 wait_for_completion(&rcu.completion);
504 destroy_rcu_head_on_stack(&rcu.head);
505}
506
507static struct rcu_torture_ops rcu_bh_ops = { 483static struct rcu_torture_ops rcu_bh_ops = {
508 .init = NULL, 484 .init = NULL,
509 .cleanup = NULL, 485 .cleanup = NULL,
@@ -512,7 +488,7 @@ static struct rcu_torture_ops rcu_bh_ops = {
512 .readunlock = rcu_bh_torture_read_unlock, 488 .readunlock = rcu_bh_torture_read_unlock,
513 .completed = rcu_bh_torture_completed, 489 .completed = rcu_bh_torture_completed,
514 .deferred_free = rcu_bh_torture_deferred_free, 490 .deferred_free = rcu_bh_torture_deferred_free,
515 .sync = rcu_bh_torture_synchronize, 491 .sync = synchronize_rcu_bh,
516 .cb_barrier = rcu_barrier_bh, 492 .cb_barrier = rcu_barrier_bh,
517 .fqs = rcu_bh_force_quiescent_state, 493 .fqs = rcu_bh_force_quiescent_state,
518 .stats = NULL, 494 .stats = NULL,
@@ -528,7 +504,7 @@ static struct rcu_torture_ops rcu_bh_sync_ops = {
528 .readunlock = rcu_bh_torture_read_unlock, 504 .readunlock = rcu_bh_torture_read_unlock,
529 .completed = rcu_bh_torture_completed, 505 .completed = rcu_bh_torture_completed,
530 .deferred_free = rcu_sync_torture_deferred_free, 506 .deferred_free = rcu_sync_torture_deferred_free,
531 .sync = rcu_bh_torture_synchronize, 507 .sync = synchronize_rcu_bh,
532 .cb_barrier = NULL, 508 .cb_barrier = NULL,
533 .fqs = rcu_bh_force_quiescent_state, 509 .fqs = rcu_bh_force_quiescent_state,
534 .stats = NULL, 510 .stats = NULL,
@@ -536,6 +512,22 @@ static struct rcu_torture_ops rcu_bh_sync_ops = {
536 .name = "rcu_bh_sync" 512 .name = "rcu_bh_sync"
537}; 513};
538 514
515static struct rcu_torture_ops rcu_bh_expedited_ops = {
516 .init = rcu_sync_torture_init,
517 .cleanup = NULL,
518 .readlock = rcu_bh_torture_read_lock,
519 .read_delay = rcu_read_delay, /* just reuse rcu's version. */
520 .readunlock = rcu_bh_torture_read_unlock,
521 .completed = rcu_bh_torture_completed,
522 .deferred_free = rcu_sync_torture_deferred_free,
523 .sync = synchronize_rcu_bh_expedited,
524 .cb_barrier = NULL,
525 .fqs = rcu_bh_force_quiescent_state,
526 .stats = NULL,
527 .irq_capable = 1,
528 .name = "rcu_bh_expedited"
529};
530
539/* 531/*
540 * Definitions for srcu torture testing. 532 * Definitions for srcu torture testing.
541 */ 533 */
@@ -659,11 +651,6 @@ static void rcu_sched_torture_deferred_free(struct rcu_torture *p)
659 call_rcu_sched(&p->rtort_rcu, rcu_torture_cb); 651 call_rcu_sched(&p->rtort_rcu, rcu_torture_cb);
660} 652}
661 653
662static void sched_torture_synchronize(void)
663{
664 synchronize_sched();
665}
666
667static struct rcu_torture_ops sched_ops = { 654static struct rcu_torture_ops sched_ops = {
668 .init = rcu_sync_torture_init, 655 .init = rcu_sync_torture_init,
669 .cleanup = NULL, 656 .cleanup = NULL,
@@ -672,7 +659,7 @@ static struct rcu_torture_ops sched_ops = {
672 .readunlock = sched_torture_read_unlock, 659 .readunlock = sched_torture_read_unlock,
673 .completed = rcu_no_completed, 660 .completed = rcu_no_completed,
674 .deferred_free = rcu_sched_torture_deferred_free, 661 .deferred_free = rcu_sched_torture_deferred_free,
675 .sync = sched_torture_synchronize, 662 .sync = synchronize_sched,
676 .cb_barrier = rcu_barrier_sched, 663 .cb_barrier = rcu_barrier_sched,
677 .fqs = rcu_sched_force_quiescent_state, 664 .fqs = rcu_sched_force_quiescent_state,
678 .stats = NULL, 665 .stats = NULL,
@@ -688,7 +675,7 @@ static struct rcu_torture_ops sched_sync_ops = {
688 .readunlock = sched_torture_read_unlock, 675 .readunlock = sched_torture_read_unlock,
689 .completed = rcu_no_completed, 676 .completed = rcu_no_completed,
690 .deferred_free = rcu_sync_torture_deferred_free, 677 .deferred_free = rcu_sync_torture_deferred_free,
691 .sync = sched_torture_synchronize, 678 .sync = synchronize_sched,
692 .cb_barrier = NULL, 679 .cb_barrier = NULL,
693 .fqs = rcu_sched_force_quiescent_state, 680 .fqs = rcu_sched_force_quiescent_state,
694 .stats = NULL, 681 .stats = NULL,
@@ -754,7 +741,7 @@ static int rcu_torture_boost(void *arg)
754 do { 741 do {
755 /* Wait for the next test interval. */ 742 /* Wait for the next test interval. */
756 oldstarttime = boost_starttime; 743 oldstarttime = boost_starttime;
757 while (jiffies - oldstarttime > ULONG_MAX / 2) { 744 while (ULONG_CMP_LT(jiffies, oldstarttime)) {
758 schedule_timeout_uninterruptible(1); 745 schedule_timeout_uninterruptible(1);
759 rcu_stutter_wait("rcu_torture_boost"); 746 rcu_stutter_wait("rcu_torture_boost");
760 if (kthread_should_stop() || 747 if (kthread_should_stop() ||
@@ -765,7 +752,7 @@ static int rcu_torture_boost(void *arg)
765 /* Do one boost-test interval. */ 752 /* Do one boost-test interval. */
766 endtime = oldstarttime + test_boost_duration * HZ; 753 endtime = oldstarttime + test_boost_duration * HZ;
767 call_rcu_time = jiffies; 754 call_rcu_time = jiffies;
768 while (jiffies - endtime > ULONG_MAX / 2) { 755 while (ULONG_CMP_LT(jiffies, endtime)) {
769 /* If we don't have a callback in flight, post one. */ 756 /* If we don't have a callback in flight, post one. */
770 if (!rbi.inflight) { 757 if (!rbi.inflight) {
771 smp_mb(); /* RCU core before ->inflight = 1. */ 758 smp_mb(); /* RCU core before ->inflight = 1. */
@@ -792,7 +779,8 @@ static int rcu_torture_boost(void *arg)
792 * interval. Besides, we are running at RT priority, 779 * interval. Besides, we are running at RT priority,
793 * so delays should be relatively rare. 780 * so delays should be relatively rare.
794 */ 781 */
795 while (oldstarttime == boost_starttime) { 782 while (oldstarttime == boost_starttime &&
783 !kthread_should_stop()) {
796 if (mutex_trylock(&boost_mutex)) { 784 if (mutex_trylock(&boost_mutex)) {
797 boost_starttime = jiffies + 785 boost_starttime = jiffies +
798 test_boost_interval * HZ; 786 test_boost_interval * HZ;
@@ -809,11 +797,11 @@ checkwait: rcu_stutter_wait("rcu_torture_boost");
809 797
810 /* Clean up and exit. */ 798 /* Clean up and exit. */
811 VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping"); 799 VERBOSE_PRINTK_STRING("rcu_torture_boost task stopping");
812 destroy_rcu_head_on_stack(&rbi.rcu);
813 rcutorture_shutdown_absorb("rcu_torture_boost"); 800 rcutorture_shutdown_absorb("rcu_torture_boost");
814 while (!kthread_should_stop() || rbi.inflight) 801 while (!kthread_should_stop() || rbi.inflight)
815 schedule_timeout_uninterruptible(1); 802 schedule_timeout_uninterruptible(1);
816 smp_mb(); /* order accesses to ->inflight before stack-frame death. */ 803 smp_mb(); /* order accesses to ->inflight before stack-frame death. */
804 destroy_rcu_head_on_stack(&rbi.rcu);
817 return 0; 805 return 0;
818} 806}
819 807
@@ -831,11 +819,13 @@ rcu_torture_fqs(void *arg)
831 VERBOSE_PRINTK_STRING("rcu_torture_fqs task started"); 819 VERBOSE_PRINTK_STRING("rcu_torture_fqs task started");
832 do { 820 do {
833 fqs_resume_time = jiffies + fqs_stutter * HZ; 821 fqs_resume_time = jiffies + fqs_stutter * HZ;
834 while (jiffies - fqs_resume_time > LONG_MAX) { 822 while (ULONG_CMP_LT(jiffies, fqs_resume_time) &&
823 !kthread_should_stop()) {
835 schedule_timeout_interruptible(1); 824 schedule_timeout_interruptible(1);
836 } 825 }
837 fqs_burst_remaining = fqs_duration; 826 fqs_burst_remaining = fqs_duration;
838 while (fqs_burst_remaining > 0) { 827 while (fqs_burst_remaining > 0 &&
828 !kthread_should_stop()) {
839 cur_ops->fqs(); 829 cur_ops->fqs();
840 udelay(fqs_holdoff); 830 udelay(fqs_holdoff);
841 fqs_burst_remaining -= fqs_holdoff; 831 fqs_burst_remaining -= fqs_holdoff;
@@ -1280,8 +1270,9 @@ static int rcutorture_booster_init(int cpu)
1280 /* Don't allow time recalculation while creating a new task. */ 1270 /* Don't allow time recalculation while creating a new task. */
1281 mutex_lock(&boost_mutex); 1271 mutex_lock(&boost_mutex);
1282 VERBOSE_PRINTK_STRING("Creating rcu_torture_boost task"); 1272 VERBOSE_PRINTK_STRING("Creating rcu_torture_boost task");
1283 boost_tasks[cpu] = kthread_create(rcu_torture_boost, NULL, 1273 boost_tasks[cpu] = kthread_create_on_node(rcu_torture_boost, NULL,
1284 "rcu_torture_boost"); 1274 cpu_to_node(cpu),
1275 "rcu_torture_boost");
1285 if (IS_ERR(boost_tasks[cpu])) { 1276 if (IS_ERR(boost_tasks[cpu])) {
1286 retval = PTR_ERR(boost_tasks[cpu]); 1277 retval = PTR_ERR(boost_tasks[cpu]);
1287 VERBOSE_PRINTK_STRING("rcu_torture_boost task create failed"); 1278 VERBOSE_PRINTK_STRING("rcu_torture_boost task create failed");
@@ -1424,7 +1415,7 @@ rcu_torture_init(void)
1424 int firsterr = 0; 1415 int firsterr = 0;
1425 static struct rcu_torture_ops *torture_ops[] = 1416 static struct rcu_torture_ops *torture_ops[] =
1426 { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops, 1417 { &rcu_ops, &rcu_sync_ops, &rcu_expedited_ops,
1427 &rcu_bh_ops, &rcu_bh_sync_ops, 1418 &rcu_bh_ops, &rcu_bh_sync_ops, &rcu_bh_expedited_ops,
1428 &srcu_ops, &srcu_expedited_ops, 1419 &srcu_ops, &srcu_expedited_ops,
1429 &sched_ops, &sched_sync_ops, &sched_expedited_ops, }; 1420 &sched_ops, &sched_sync_ops, &sched_expedited_ops, };
1430 1421
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index ba06207b1dd3..e234eb92a177 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -52,13 +52,16 @@
52#include <linux/prefetch.h> 52#include <linux/prefetch.h>
53 53
54#include "rcutree.h" 54#include "rcutree.h"
55#include <trace/events/rcu.h>
56
57#include "rcu.h"
55 58
56/* Data structures. */ 59/* Data structures. */
57 60
58static struct lock_class_key rcu_node_class[NUM_RCU_LVLS]; 61static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
59 62
60#define RCU_STATE_INITIALIZER(structname) { \ 63#define RCU_STATE_INITIALIZER(structname) { \
61 .level = { &structname.node[0] }, \ 64 .level = { &structname##_state.node[0] }, \
62 .levelcnt = { \ 65 .levelcnt = { \
63 NUM_RCU_LVL_0, /* root of hierarchy. */ \ 66 NUM_RCU_LVL_0, /* root of hierarchy. */ \
64 NUM_RCU_LVL_1, \ 67 NUM_RCU_LVL_1, \
@@ -69,17 +72,17 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
69 .signaled = RCU_GP_IDLE, \ 72 .signaled = RCU_GP_IDLE, \
70 .gpnum = -300, \ 73 .gpnum = -300, \
71 .completed = -300, \ 74 .completed = -300, \
72 .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname.onofflock), \ 75 .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \
73 .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname.fqslock), \ 76 .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.fqslock), \
74 .n_force_qs = 0, \ 77 .n_force_qs = 0, \
75 .n_force_qs_ngp = 0, \ 78 .n_force_qs_ngp = 0, \
76 .name = #structname, \ 79 .name = #structname, \
77} 80}
78 81
79struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched_state); 82struct rcu_state rcu_sched_state = RCU_STATE_INITIALIZER(rcu_sched);
80DEFINE_PER_CPU(struct rcu_data, rcu_sched_data); 83DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
81 84
82struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); 85struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh);
83DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); 86DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
84 87
85static struct rcu_state *rcu_state; 88static struct rcu_state *rcu_state;
@@ -128,8 +131,6 @@ static void rcu_node_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu);
128static void invoke_rcu_core(void); 131static void invoke_rcu_core(void);
129static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp); 132static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp);
130 133
131#define RCU_KTHREAD_PRIO 1 /* RT priority for per-CPU kthreads. */
132
133/* 134/*
134 * Track the rcutorture test sequence number and the update version 135 * Track the rcutorture test sequence number and the update version
135 * number within a given test. The rcutorture_testseq is incremented 136 * number within a given test. The rcutorture_testseq is incremented
@@ -156,33 +157,41 @@ static int rcu_gp_in_progress(struct rcu_state *rsp)
156 * Note a quiescent state. Because we do not need to know 157 * Note a quiescent state. Because we do not need to know
157 * how many quiescent states passed, just if there was at least 158 * how many quiescent states passed, just if there was at least
158 * one since the start of the grace period, this just sets a flag. 159 * one since the start of the grace period, this just sets a flag.
160 * The caller must have disabled preemption.
159 */ 161 */
160void rcu_sched_qs(int cpu) 162void rcu_sched_qs(int cpu)
161{ 163{
162 struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu); 164 struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);
163 165
164 rdp->passed_quiesc_completed = rdp->gpnum - 1; 166 rdp->passed_quiesce_gpnum = rdp->gpnum;
165 barrier(); 167 barrier();
166 rdp->passed_quiesc = 1; 168 if (rdp->passed_quiesce == 0)
169 trace_rcu_grace_period("rcu_sched", rdp->gpnum, "cpuqs");
170 rdp->passed_quiesce = 1;
167} 171}
168 172
169void rcu_bh_qs(int cpu) 173void rcu_bh_qs(int cpu)
170{ 174{
171 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu); 175 struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
172 176
173 rdp->passed_quiesc_completed = rdp->gpnum - 1; 177 rdp->passed_quiesce_gpnum = rdp->gpnum;
174 barrier(); 178 barrier();
175 rdp->passed_quiesc = 1; 179 if (rdp->passed_quiesce == 0)
180 trace_rcu_grace_period("rcu_bh", rdp->gpnum, "cpuqs");
181 rdp->passed_quiesce = 1;
176} 182}
177 183
178/* 184/*
179 * Note a context switch. This is a quiescent state for RCU-sched, 185 * Note a context switch. This is a quiescent state for RCU-sched,
180 * and requires special handling for preemptible RCU. 186 * and requires special handling for preemptible RCU.
187 * The caller must have disabled preemption.
181 */ 188 */
182void rcu_note_context_switch(int cpu) 189void rcu_note_context_switch(int cpu)
183{ 190{
191 trace_rcu_utilization("Start context switch");
184 rcu_sched_qs(cpu); 192 rcu_sched_qs(cpu);
185 rcu_preempt_note_context_switch(cpu); 193 rcu_preempt_note_context_switch(cpu);
194 trace_rcu_utilization("End context switch");
186} 195}
187EXPORT_SYMBOL_GPL(rcu_note_context_switch); 196EXPORT_SYMBOL_GPL(rcu_note_context_switch);
188 197
@@ -193,7 +202,7 @@ DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
193}; 202};
194#endif /* #ifdef CONFIG_NO_HZ */ 203#endif /* #ifdef CONFIG_NO_HZ */
195 204
196static int blimit = 10; /* Maximum callbacks per softirq. */ 205static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */
197static int qhimark = 10000; /* If this many pending, ignore blimit. */ 206static int qhimark = 10000; /* If this many pending, ignore blimit. */
198static int qlowmark = 100; /* Once only this many pending, use blimit. */ 207static int qlowmark = 100; /* Once only this many pending, use blimit. */
199 208
@@ -314,6 +323,7 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp)
314 * trust its state not to change because interrupts are disabled. 323 * trust its state not to change because interrupts are disabled.
315 */ 324 */
316 if (cpu_is_offline(rdp->cpu)) { 325 if (cpu_is_offline(rdp->cpu)) {
326 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl");
317 rdp->offline_fqs++; 327 rdp->offline_fqs++;
318 return 1; 328 return 1;
319 } 329 }
@@ -354,19 +364,13 @@ void rcu_enter_nohz(void)
354 local_irq_restore(flags); 364 local_irq_restore(flags);
355 return; 365 return;
356 } 366 }
367 trace_rcu_dyntick("Start");
357 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ 368 /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */
358 smp_mb__before_atomic_inc(); /* See above. */ 369 smp_mb__before_atomic_inc(); /* See above. */
359 atomic_inc(&rdtp->dynticks); 370 atomic_inc(&rdtp->dynticks);
360 smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ 371 smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */
361 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); 372 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
362 local_irq_restore(flags); 373 local_irq_restore(flags);
363
364 /* If the interrupt queued a callback, get out of dyntick mode. */
365 if (in_irq() &&
366 (__get_cpu_var(rcu_sched_data).nxtlist ||
367 __get_cpu_var(rcu_bh_data).nxtlist ||
368 rcu_preempt_needs_cpu(smp_processor_id())))
369 set_need_resched();
370} 374}
371 375
372/* 376/*
@@ -391,6 +395,7 @@ void rcu_exit_nohz(void)
391 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ 395 /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
392 smp_mb__after_atomic_inc(); /* See above. */ 396 smp_mb__after_atomic_inc(); /* See above. */
393 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); 397 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1));
398 trace_rcu_dyntick("End");
394 local_irq_restore(flags); 399 local_irq_restore(flags);
395} 400}
396 401
@@ -481,11 +486,11 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp)
481 */ 486 */
482static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) 487static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
483{ 488{
484 unsigned long curr; 489 unsigned int curr;
485 unsigned long snap; 490 unsigned int snap;
486 491
487 curr = (unsigned long)atomic_add_return(0, &rdp->dynticks->dynticks); 492 curr = (unsigned int)atomic_add_return(0, &rdp->dynticks->dynticks);
488 snap = (unsigned long)rdp->dynticks_snap; 493 snap = (unsigned int)rdp->dynticks_snap;
489 494
490 /* 495 /*
491 * If the CPU passed through or entered a dynticks idle phase with 496 * If the CPU passed through or entered a dynticks idle phase with
@@ -495,7 +500,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
495 * read-side critical section that started before the beginning 500 * read-side critical section that started before the beginning
496 * of the current RCU grace period. 501 * of the current RCU grace period.
497 */ 502 */
498 if ((curr & 0x1) == 0 || ULONG_CMP_GE(curr, snap + 2)) { 503 if ((curr & 0x1) == 0 || UINT_CMP_GE(curr, snap + 2)) {
504 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "dti");
499 rdp->dynticks_fqs++; 505 rdp->dynticks_fqs++;
500 return 1; 506 return 1;
501 } 507 }
@@ -537,6 +543,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
537 int cpu; 543 int cpu;
538 long delta; 544 long delta;
539 unsigned long flags; 545 unsigned long flags;
546 int ndetected;
540 struct rcu_node *rnp = rcu_get_root(rsp); 547 struct rcu_node *rnp = rcu_get_root(rsp);
541 548
542 /* Only let one CPU complain about others per time interval. */ 549 /* Only let one CPU complain about others per time interval. */
@@ -553,7 +560,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
553 * Now rat on any tasks that got kicked up to the root rcu_node 560 * Now rat on any tasks that got kicked up to the root rcu_node
554 * due to CPU offlining. 561 * due to CPU offlining.
555 */ 562 */
556 rcu_print_task_stall(rnp); 563 ndetected = rcu_print_task_stall(rnp);
557 raw_spin_unlock_irqrestore(&rnp->lock, flags); 564 raw_spin_unlock_irqrestore(&rnp->lock, flags);
558 565
559 /* 566 /*
@@ -565,17 +572,22 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
565 rsp->name); 572 rsp->name);
566 rcu_for_each_leaf_node(rsp, rnp) { 573 rcu_for_each_leaf_node(rsp, rnp) {
567 raw_spin_lock_irqsave(&rnp->lock, flags); 574 raw_spin_lock_irqsave(&rnp->lock, flags);
568 rcu_print_task_stall(rnp); 575 ndetected += rcu_print_task_stall(rnp);
569 raw_spin_unlock_irqrestore(&rnp->lock, flags); 576 raw_spin_unlock_irqrestore(&rnp->lock, flags);
570 if (rnp->qsmask == 0) 577 if (rnp->qsmask == 0)
571 continue; 578 continue;
572 for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) 579 for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
573 if (rnp->qsmask & (1UL << cpu)) 580 if (rnp->qsmask & (1UL << cpu)) {
574 printk(" %d", rnp->grplo + cpu); 581 printk(" %d", rnp->grplo + cpu);
582 ndetected++;
583 }
575 } 584 }
576 printk("} (detected by %d, t=%ld jiffies)\n", 585 printk("} (detected by %d, t=%ld jiffies)\n",
577 smp_processor_id(), (long)(jiffies - rsp->gp_start)); 586 smp_processor_id(), (long)(jiffies - rsp->gp_start));
578 trigger_all_cpu_backtrace(); 587 if (ndetected == 0)
588 printk(KERN_ERR "INFO: Stall ended before state dump start\n");
589 else if (!trigger_all_cpu_backtrace())
590 dump_stack();
579 591
580 /* If so configured, complain about tasks blocking the grace period. */ 592 /* If so configured, complain about tasks blocking the grace period. */
581 593
@@ -596,7 +608,8 @@ static void print_cpu_stall(struct rcu_state *rsp)
596 */ 608 */
597 printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", 609 printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n",
598 rsp->name, smp_processor_id(), jiffies - rsp->gp_start); 610 rsp->name, smp_processor_id(), jiffies - rsp->gp_start);
599 trigger_all_cpu_backtrace(); 611 if (!trigger_all_cpu_backtrace())
612 dump_stack();
600 613
601 raw_spin_lock_irqsave(&rnp->lock, flags); 614 raw_spin_lock_irqsave(&rnp->lock, flags);
602 if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) 615 if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall))
@@ -678,9 +691,10 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct
678 * go looking for one. 691 * go looking for one.
679 */ 692 */
680 rdp->gpnum = rnp->gpnum; 693 rdp->gpnum = rnp->gpnum;
694 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpustart");
681 if (rnp->qsmask & rdp->grpmask) { 695 if (rnp->qsmask & rdp->grpmask) {
682 rdp->qs_pending = 1; 696 rdp->qs_pending = 1;
683 rdp->passed_quiesc = 0; 697 rdp->passed_quiesce = 0;
684 } else 698 } else
685 rdp->qs_pending = 0; 699 rdp->qs_pending = 0;
686 } 700 }
@@ -741,6 +755,7 @@ __rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat
741 755
742 /* Remember that we saw this grace-period completion. */ 756 /* Remember that we saw this grace-period completion. */
743 rdp->completed = rnp->completed; 757 rdp->completed = rnp->completed;
758 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuend");
744 759
745 /* 760 /*
746 * If we were in an extended quiescent state, we may have 761 * If we were in an extended quiescent state, we may have
@@ -826,31 +841,31 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
826 struct rcu_data *rdp = this_cpu_ptr(rsp->rda); 841 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
827 struct rcu_node *rnp = rcu_get_root(rsp); 842 struct rcu_node *rnp = rcu_get_root(rsp);
828 843
829 if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) { 844 if (!rcu_scheduler_fully_active ||
830 if (cpu_needs_another_gp(rsp, rdp)) 845 !cpu_needs_another_gp(rsp, rdp)) {
831 rsp->fqs_need_gp = 1; 846 /*
832 if (rnp->completed == rsp->completed) { 847 * Either the scheduler hasn't yet spawned the first
833 raw_spin_unlock_irqrestore(&rnp->lock, flags); 848 * non-idle task or this CPU does not need another
834 return; 849 * grace period. Either way, don't start a new grace
835 } 850 * period.
836 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 851 */
852 raw_spin_unlock_irqrestore(&rnp->lock, flags);
853 return;
854 }
837 855
856 if (rsp->fqs_active) {
838 /* 857 /*
839 * Propagate new ->completed value to rcu_node structures 858 * This CPU needs a grace period, but force_quiescent_state()
840 * so that other CPUs don't have to wait until the start 859 * is running. Tell it to start one on this CPU's behalf.
841 * of the next grace period to process their callbacks.
842 */ 860 */
843 rcu_for_each_node_breadth_first(rsp, rnp) { 861 rsp->fqs_need_gp = 1;
844 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 862 raw_spin_unlock_irqrestore(&rnp->lock, flags);
845 rnp->completed = rsp->completed;
846 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
847 }
848 local_irq_restore(flags);
849 return; 863 return;
850 } 864 }
851 865
852 /* Advance to a new grace period and initialize state. */ 866 /* Advance to a new grace period and initialize state. */
853 rsp->gpnum++; 867 rsp->gpnum++;
868 trace_rcu_grace_period(rsp->name, rsp->gpnum, "start");
854 WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT); 869 WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT);
855 rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ 870 rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */
856 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; 871 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
@@ -865,6 +880,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
865 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ 880 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */
866 rcu_start_gp_per_cpu(rsp, rnp, rdp); 881 rcu_start_gp_per_cpu(rsp, rnp, rdp);
867 rcu_preempt_boost_start_gp(rnp); 882 rcu_preempt_boost_start_gp(rnp);
883 trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
884 rnp->level, rnp->grplo,
885 rnp->grphi, rnp->qsmask);
868 raw_spin_unlock_irqrestore(&rnp->lock, flags); 886 raw_spin_unlock_irqrestore(&rnp->lock, flags);
869 return; 887 return;
870 } 888 }
@@ -901,6 +919,9 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
901 if (rnp == rdp->mynode) 919 if (rnp == rdp->mynode)
902 rcu_start_gp_per_cpu(rsp, rnp, rdp); 920 rcu_start_gp_per_cpu(rsp, rnp, rdp);
903 rcu_preempt_boost_start_gp(rnp); 921 rcu_preempt_boost_start_gp(rnp);
922 trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
923 rnp->level, rnp->grplo,
924 rnp->grphi, rnp->qsmask);
904 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 925 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
905 } 926 }
906 927
@@ -922,6 +943,8 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
922 __releases(rcu_get_root(rsp)->lock) 943 __releases(rcu_get_root(rsp)->lock)
923{ 944{
924 unsigned long gp_duration; 945 unsigned long gp_duration;
946 struct rcu_node *rnp = rcu_get_root(rsp);
947 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
925 948
926 WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); 949 WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
927 950
@@ -933,7 +956,41 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
933 gp_duration = jiffies - rsp->gp_start; 956 gp_duration = jiffies - rsp->gp_start;
934 if (gp_duration > rsp->gp_max) 957 if (gp_duration > rsp->gp_max)
935 rsp->gp_max = gp_duration; 958 rsp->gp_max = gp_duration;
936 rsp->completed = rsp->gpnum; 959
960 /*
961 * We know the grace period is complete, but to everyone else
962 * it appears to still be ongoing. But it is also the case
963 * that to everyone else it looks like there is nothing that
964 * they can do to advance the grace period. It is therefore
965 * safe for us to drop the lock in order to mark the grace
966 * period as completed in all of the rcu_node structures.
967 *
968 * But if this CPU needs another grace period, it will take
969 * care of this while initializing the next grace period.
970 * We use RCU_WAIT_TAIL instead of the usual RCU_DONE_TAIL
971 * because the callbacks have not yet been advanced: Those
972 * callbacks are waiting on the grace period that just now
973 * completed.
974 */
975 if (*rdp->nxttail[RCU_WAIT_TAIL] == NULL) {
976 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
977
978 /*
979 * Propagate new ->completed value to rcu_node structures
980 * so that other CPUs don't have to wait until the start
981 * of the next grace period to process their callbacks.
982 */
983 rcu_for_each_node_breadth_first(rsp, rnp) {
984 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
985 rnp->completed = rsp->gpnum;
986 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
987 }
988 rnp = rcu_get_root(rsp);
989 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
990 }
991
992 rsp->completed = rsp->gpnum; /* Declare the grace period complete. */
993 trace_rcu_grace_period(rsp->name, rsp->completed, "end");
937 rsp->signaled = RCU_GP_IDLE; 994 rsp->signaled = RCU_GP_IDLE;
938 rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ 995 rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */
939} 996}
@@ -962,6 +1019,10 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
962 return; 1019 return;
963 } 1020 }
964 rnp->qsmask &= ~mask; 1021 rnp->qsmask &= ~mask;
1022 trace_rcu_quiescent_state_report(rsp->name, rnp->gpnum,
1023 mask, rnp->qsmask, rnp->level,
1024 rnp->grplo, rnp->grphi,
1025 !!rnp->gp_tasks);
965 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) { 1026 if (rnp->qsmask != 0 || rcu_preempt_blocked_readers_cgp(rnp)) {
966 1027
967 /* Other bits still set at this level, so done. */ 1028 /* Other bits still set at this level, so done. */
@@ -1000,7 +1061,7 @@ rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
1000 * based on quiescent states detected in an earlier grace period! 1061 * based on quiescent states detected in an earlier grace period!
1001 */ 1062 */
1002static void 1063static void
1003rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) 1064rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastgp)
1004{ 1065{
1005 unsigned long flags; 1066 unsigned long flags;
1006 unsigned long mask; 1067 unsigned long mask;
@@ -1008,17 +1069,15 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long las
1008 1069
1009 rnp = rdp->mynode; 1070 rnp = rdp->mynode;
1010 raw_spin_lock_irqsave(&rnp->lock, flags); 1071 raw_spin_lock_irqsave(&rnp->lock, flags);
1011 if (lastcomp != rnp->completed) { 1072 if (lastgp != rnp->gpnum || rnp->completed == rnp->gpnum) {
1012 1073
1013 /* 1074 /*
1014 * Someone beat us to it for this grace period, so leave. 1075 * The grace period in which this quiescent state was
1015 * The race with GP start is resolved by the fact that we 1076 * recorded has ended, so don't report it upwards.
1016 * hold the leaf rcu_node lock, so that the per-CPU bits 1077 * We will instead need a new quiescent state that lies
1017 * cannot yet be initialized -- so we would simply find our 1078 * within the current grace period.
1018 * CPU's bit already cleared in rcu_report_qs_rnp() if this
1019 * race occurred.
1020 */ 1079 */
1021 rdp->passed_quiesc = 0; /* try again later! */ 1080 rdp->passed_quiesce = 0; /* need qs for new gp. */
1022 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1081 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1023 return; 1082 return;
1024 } 1083 }
@@ -1062,14 +1121,14 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
1062 * Was there a quiescent state since the beginning of the grace 1121 * Was there a quiescent state since the beginning of the grace
1063 * period? If no, then exit and wait for the next call. 1122 * period? If no, then exit and wait for the next call.
1064 */ 1123 */
1065 if (!rdp->passed_quiesc) 1124 if (!rdp->passed_quiesce)
1066 return; 1125 return;
1067 1126
1068 /* 1127 /*
1069 * Tell RCU we are done (but rcu_report_qs_rdp() will be the 1128 * Tell RCU we are done (but rcu_report_qs_rdp() will be the
1070 * judge of that). 1129 * judge of that).
1071 */ 1130 */
1072 rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesc_completed); 1131 rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesce_gpnum);
1073} 1132}
1074 1133
1075#ifdef CONFIG_HOTPLUG_CPU 1134#ifdef CONFIG_HOTPLUG_CPU
@@ -1130,11 +1189,20 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
1130 if (rnp->qsmaskinit != 0) { 1189 if (rnp->qsmaskinit != 0) {
1131 if (rnp != rdp->mynode) 1190 if (rnp != rdp->mynode)
1132 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1191 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1192 else
1193 trace_rcu_grace_period(rsp->name,
1194 rnp->gpnum + 1 -
1195 !!(rnp->qsmask & mask),
1196 "cpuofl");
1133 break; 1197 break;
1134 } 1198 }
1135 if (rnp == rdp->mynode) 1199 if (rnp == rdp->mynode) {
1200 trace_rcu_grace_period(rsp->name,
1201 rnp->gpnum + 1 -
1202 !!(rnp->qsmask & mask),
1203 "cpuofl");
1136 need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); 1204 need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
1137 else 1205 } else
1138 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1206 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1139 mask = rnp->grpmask; 1207 mask = rnp->grpmask;
1140 rnp = rnp->parent; 1208 rnp = rnp->parent;
@@ -1190,17 +1258,22 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1190{ 1258{
1191 unsigned long flags; 1259 unsigned long flags;
1192 struct rcu_head *next, *list, **tail; 1260 struct rcu_head *next, *list, **tail;
1193 int count; 1261 int bl, count;
1194 1262
1195 /* If no callbacks are ready, just return.*/ 1263 /* If no callbacks are ready, just return.*/
1196 if (!cpu_has_callbacks_ready_to_invoke(rdp)) 1264 if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
1265 trace_rcu_batch_start(rsp->name, 0, 0);
1266 trace_rcu_batch_end(rsp->name, 0);
1197 return; 1267 return;
1268 }
1198 1269
1199 /* 1270 /*
1200 * Extract the list of ready callbacks, disabling to prevent 1271 * Extract the list of ready callbacks, disabling to prevent
1201 * races with call_rcu() from interrupt handlers. 1272 * races with call_rcu() from interrupt handlers.
1202 */ 1273 */
1203 local_irq_save(flags); 1274 local_irq_save(flags);
1275 bl = rdp->blimit;
1276 trace_rcu_batch_start(rsp->name, rdp->qlen, bl);
1204 list = rdp->nxtlist; 1277 list = rdp->nxtlist;
1205 rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; 1278 rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
1206 *rdp->nxttail[RCU_DONE_TAIL] = NULL; 1279 *rdp->nxttail[RCU_DONE_TAIL] = NULL;
@@ -1216,13 +1289,14 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1216 next = list->next; 1289 next = list->next;
1217 prefetch(next); 1290 prefetch(next);
1218 debug_rcu_head_unqueue(list); 1291 debug_rcu_head_unqueue(list);
1219 __rcu_reclaim(list); 1292 __rcu_reclaim(rsp->name, list);
1220 list = next; 1293 list = next;
1221 if (++count >= rdp->blimit) 1294 if (++count >= bl)
1222 break; 1295 break;
1223 } 1296 }
1224 1297
1225 local_irq_save(flags); 1298 local_irq_save(flags);
1299 trace_rcu_batch_end(rsp->name, count);
1226 1300
1227 /* Update count, and requeue any remaining callbacks. */ 1301 /* Update count, and requeue any remaining callbacks. */
1228 rdp->qlen -= count; 1302 rdp->qlen -= count;
@@ -1250,7 +1324,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1250 1324
1251 local_irq_restore(flags); 1325 local_irq_restore(flags);
1252 1326
1253 /* Re-raise the RCU softirq if there are callbacks remaining. */ 1327 /* Re-invoke RCU core processing if there are callbacks remaining. */
1254 if (cpu_has_callbacks_ready_to_invoke(rdp)) 1328 if (cpu_has_callbacks_ready_to_invoke(rdp))
1255 invoke_rcu_core(); 1329 invoke_rcu_core();
1256} 1330}
@@ -1258,7 +1332,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1258/* 1332/*
1259 * Check to see if this CPU is in a non-context-switch quiescent state 1333 * Check to see if this CPU is in a non-context-switch quiescent state
1260 * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). 1334 * (user mode or idle loop for rcu, non-softirq execution for rcu_bh).
1261 * Also schedule the RCU softirq handler. 1335 * Also schedule RCU core processing.
1262 * 1336 *
1263 * This function must be called with hardirqs disabled. It is normally 1337 * This function must be called with hardirqs disabled. It is normally
1264 * invoked from the scheduling-clock interrupt. If rcu_pending returns 1338 * invoked from the scheduling-clock interrupt. If rcu_pending returns
@@ -1266,6 +1340,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1266 */ 1340 */
1267void rcu_check_callbacks(int cpu, int user) 1341void rcu_check_callbacks(int cpu, int user)
1268{ 1342{
1343 trace_rcu_utilization("Start scheduler-tick");
1269 if (user || 1344 if (user ||
1270 (idle_cpu(cpu) && rcu_scheduler_active && 1345 (idle_cpu(cpu) && rcu_scheduler_active &&
1271 !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { 1346 !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
@@ -1299,6 +1374,7 @@ void rcu_check_callbacks(int cpu, int user)
1299 rcu_preempt_check_callbacks(cpu); 1374 rcu_preempt_check_callbacks(cpu);
1300 if (rcu_pending(cpu)) 1375 if (rcu_pending(cpu))
1301 invoke_rcu_core(); 1376 invoke_rcu_core();
1377 trace_rcu_utilization("End scheduler-tick");
1302} 1378}
1303 1379
1304#ifdef CONFIG_SMP 1380#ifdef CONFIG_SMP
@@ -1360,10 +1436,14 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
1360 unsigned long flags; 1436 unsigned long flags;
1361 struct rcu_node *rnp = rcu_get_root(rsp); 1437 struct rcu_node *rnp = rcu_get_root(rsp);
1362 1438
1363 if (!rcu_gp_in_progress(rsp)) 1439 trace_rcu_utilization("Start fqs");
1440 if (!rcu_gp_in_progress(rsp)) {
1441 trace_rcu_utilization("End fqs");
1364 return; /* No grace period in progress, nothing to force. */ 1442 return; /* No grace period in progress, nothing to force. */
1443 }
1365 if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) { 1444 if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) {
1366 rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */ 1445 rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */
1446 trace_rcu_utilization("End fqs");
1367 return; /* Someone else is already on the job. */ 1447 return; /* Someone else is already on the job. */
1368 } 1448 }
1369 if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies)) 1449 if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies))
@@ -1412,11 +1492,13 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
1412 raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */ 1492 raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */
1413 rsp->fqs_need_gp = 0; 1493 rsp->fqs_need_gp = 0;
1414 rcu_start_gp(rsp, flags); /* releases rnp->lock */ 1494 rcu_start_gp(rsp, flags); /* releases rnp->lock */
1495 trace_rcu_utilization("End fqs");
1415 return; 1496 return;
1416 } 1497 }
1417 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */ 1498 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
1418unlock_fqs_ret: 1499unlock_fqs_ret:
1419 raw_spin_unlock_irqrestore(&rsp->fqslock, flags); 1500 raw_spin_unlock_irqrestore(&rsp->fqslock, flags);
1501 trace_rcu_utilization("End fqs");
1420} 1502}
1421 1503
1422#else /* #ifdef CONFIG_SMP */ 1504#else /* #ifdef CONFIG_SMP */
@@ -1429,9 +1511,9 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
1429#endif /* #else #ifdef CONFIG_SMP */ 1511#endif /* #else #ifdef CONFIG_SMP */
1430 1512
1431/* 1513/*
1432 * This does the RCU processing work from softirq context for the 1514 * This does the RCU core processing work for the specified rcu_state
1433 * specified rcu_state and rcu_data structures. This may be called 1515 * and rcu_data structures. This may be called only from the CPU to
1434 * only from the CPU to whom the rdp belongs. 1516 * whom the rdp belongs.
1435 */ 1517 */
1436static void 1518static void
1437__rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) 1519__rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
@@ -1468,24 +1550,24 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
1468} 1550}
1469 1551
1470/* 1552/*
1471 * Do softirq processing for the current CPU. 1553 * Do RCU core processing for the current CPU.
1472 */ 1554 */
1473static void rcu_process_callbacks(struct softirq_action *unused) 1555static void rcu_process_callbacks(struct softirq_action *unused)
1474{ 1556{
1557 trace_rcu_utilization("Start RCU core");
1475 __rcu_process_callbacks(&rcu_sched_state, 1558 __rcu_process_callbacks(&rcu_sched_state,
1476 &__get_cpu_var(rcu_sched_data)); 1559 &__get_cpu_var(rcu_sched_data));
1477 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); 1560 __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data));
1478 rcu_preempt_process_callbacks(); 1561 rcu_preempt_process_callbacks();
1479 1562 trace_rcu_utilization("End RCU core");
1480 /* If we are last CPU on way to dyntick-idle mode, accelerate it. */
1481 rcu_needs_cpu_flush();
1482} 1563}
1483 1564
1484/* 1565/*
1485 * Wake up the current CPU's kthread. This replaces raise_softirq() 1566 * Schedule RCU callback invocation. If the specified type of RCU
1486 * in earlier versions of RCU. Note that because we are running on 1567 * does not support RCU priority boosting, just do a direct call,
1487 * the current CPU with interrupts disabled, the rcu_cpu_kthread_task 1568 * otherwise wake up the per-CPU kernel kthread. Note that because we
1488 * cannot disappear out from under us. 1569 * are running on the current CPU with interrupts disabled, the
1570 * rcu_cpu_kthread_task cannot disappear out from under us.
1489 */ 1571 */
1490static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) 1572static void invoke_rcu_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
1491{ 1573{
@@ -1530,6 +1612,12 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1530 rdp->nxttail[RCU_NEXT_TAIL] = &head->next; 1612 rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
1531 rdp->qlen++; 1613 rdp->qlen++;
1532 1614
1615 if (__is_kfree_rcu_offset((unsigned long)func))
1616 trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func,
1617 rdp->qlen);
1618 else
1619 trace_rcu_callback(rsp->name, head, rdp->qlen);
1620
1533 /* If interrupts were disabled, don't dive into RCU core. */ 1621 /* If interrupts were disabled, don't dive into RCU core. */
1534 if (irqs_disabled_flags(flags)) { 1622 if (irqs_disabled_flags(flags)) {
1535 local_irq_restore(flags); 1623 local_irq_restore(flags);
@@ -1613,18 +1701,9 @@ EXPORT_SYMBOL_GPL(call_rcu_bh);
1613 */ 1701 */
1614void synchronize_sched(void) 1702void synchronize_sched(void)
1615{ 1703{
1616 struct rcu_synchronize rcu;
1617
1618 if (rcu_blocking_is_gp()) 1704 if (rcu_blocking_is_gp())
1619 return; 1705 return;
1620 1706 wait_rcu_gp(call_rcu_sched);
1621 init_rcu_head_on_stack(&rcu.head);
1622 init_completion(&rcu.completion);
1623 /* Will wake me after RCU finished. */
1624 call_rcu_sched(&rcu.head, wakeme_after_rcu);
1625 /* Wait for it. */
1626 wait_for_completion(&rcu.completion);
1627 destroy_rcu_head_on_stack(&rcu.head);
1628} 1707}
1629EXPORT_SYMBOL_GPL(synchronize_sched); 1708EXPORT_SYMBOL_GPL(synchronize_sched);
1630 1709
@@ -1639,18 +1718,9 @@ EXPORT_SYMBOL_GPL(synchronize_sched);
1639 */ 1718 */
1640void synchronize_rcu_bh(void) 1719void synchronize_rcu_bh(void)
1641{ 1720{
1642 struct rcu_synchronize rcu;
1643
1644 if (rcu_blocking_is_gp()) 1721 if (rcu_blocking_is_gp())
1645 return; 1722 return;
1646 1723 wait_rcu_gp(call_rcu_bh);
1647 init_rcu_head_on_stack(&rcu.head);
1648 init_completion(&rcu.completion);
1649 /* Will wake me after RCU finished. */
1650 call_rcu_bh(&rcu.head, wakeme_after_rcu);
1651 /* Wait for it. */
1652 wait_for_completion(&rcu.completion);
1653 destroy_rcu_head_on_stack(&rcu.head);
1654} 1724}
1655EXPORT_SYMBOL_GPL(synchronize_rcu_bh); 1725EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
1656 1726
@@ -1671,7 +1741,8 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
1671 check_cpu_stall(rsp, rdp); 1741 check_cpu_stall(rsp, rdp);
1672 1742
1673 /* Is the RCU core waiting for a quiescent state from this CPU? */ 1743 /* Is the RCU core waiting for a quiescent state from this CPU? */
1674 if (rdp->qs_pending && !rdp->passed_quiesc) { 1744 if (rcu_scheduler_fully_active &&
1745 rdp->qs_pending && !rdp->passed_quiesce) {
1675 1746
1676 /* 1747 /*
1677 * If force_quiescent_state() coming soon and this CPU 1748 * If force_quiescent_state() coming soon and this CPU
@@ -1683,7 +1754,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
1683 ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1, 1754 ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs) - 1,
1684 jiffies)) 1755 jiffies))
1685 set_need_resched(); 1756 set_need_resched();
1686 } else if (rdp->qs_pending && rdp->passed_quiesc) { 1757 } else if (rdp->qs_pending && rdp->passed_quiesce) {
1687 rdp->n_rp_report_qs++; 1758 rdp->n_rp_report_qs++;
1688 return 1; 1759 return 1;
1689 } 1760 }
@@ -1846,6 +1917,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
1846 rdp->dynticks = &per_cpu(rcu_dynticks, cpu); 1917 rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
1847#endif /* #ifdef CONFIG_NO_HZ */ 1918#endif /* #ifdef CONFIG_NO_HZ */
1848 rdp->cpu = cpu; 1919 rdp->cpu = cpu;
1920 rdp->rsp = rsp;
1849 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1921 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1850} 1922}
1851 1923
@@ -1865,8 +1937,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
1865 1937
1866 /* Set up local state, ensuring consistent view of global state. */ 1938 /* Set up local state, ensuring consistent view of global state. */
1867 raw_spin_lock_irqsave(&rnp->lock, flags); 1939 raw_spin_lock_irqsave(&rnp->lock, flags);
1868 rdp->passed_quiesc = 0; /* We could be racing with new GP, */
1869 rdp->qs_pending = 1; /* so set up to respond to current GP. */
1870 rdp->beenonline = 1; /* We have now been online. */ 1940 rdp->beenonline = 1; /* We have now been online. */
1871 rdp->preemptible = preemptible; 1941 rdp->preemptible = preemptible;
1872 rdp->qlen_last_fqs_check = 0; 1942 rdp->qlen_last_fqs_check = 0;
@@ -1891,9 +1961,17 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
1891 rnp->qsmaskinit |= mask; 1961 rnp->qsmaskinit |= mask;
1892 mask = rnp->grpmask; 1962 mask = rnp->grpmask;
1893 if (rnp == rdp->mynode) { 1963 if (rnp == rdp->mynode) {
1894 rdp->gpnum = rnp->completed; /* if GP in progress... */ 1964 /*
1965 * If there is a grace period in progress, we will
1966 * set up to wait for it next time we run the
1967 * RCU core code.
1968 */
1969 rdp->gpnum = rnp->completed;
1895 rdp->completed = rnp->completed; 1970 rdp->completed = rnp->completed;
1896 rdp->passed_quiesc_completed = rnp->completed - 1; 1971 rdp->passed_quiesce = 0;
1972 rdp->qs_pending = 0;
1973 rdp->passed_quiesce_gpnum = rnp->gpnum - 1;
1974 trace_rcu_grace_period(rsp->name, rdp->gpnum, "cpuonl");
1897 } 1975 }
1898 raw_spin_unlock(&rnp->lock); /* irqs already disabled. */ 1976 raw_spin_unlock(&rnp->lock); /* irqs already disabled. */
1899 rnp = rnp->parent; 1977 rnp = rnp->parent;
@@ -1919,6 +1997,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
1919 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); 1997 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
1920 struct rcu_node *rnp = rdp->mynode; 1998 struct rcu_node *rnp = rdp->mynode;
1921 1999
2000 trace_rcu_utilization("Start CPU hotplug");
1922 switch (action) { 2001 switch (action) {
1923 case CPU_UP_PREPARE: 2002 case CPU_UP_PREPARE:
1924 case CPU_UP_PREPARE_FROZEN: 2003 case CPU_UP_PREPARE_FROZEN:
@@ -1954,6 +2033,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
1954 default: 2033 default:
1955 break; 2034 break;
1956 } 2035 }
2036 trace_rcu_utilization("End CPU hotplug");
1957 return NOTIFY_OK; 2037 return NOTIFY_OK;
1958} 2038}
1959 2039
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 01b2ccda26fb..849ce9ec51fe 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -230,9 +230,9 @@ struct rcu_data {
230 /* in order to detect GP end. */ 230 /* in order to detect GP end. */
231 unsigned long gpnum; /* Highest gp number that this CPU */ 231 unsigned long gpnum; /* Highest gp number that this CPU */
232 /* is aware of having started. */ 232 /* is aware of having started. */
233 unsigned long passed_quiesc_completed; 233 unsigned long passed_quiesce_gpnum;
234 /* Value of completed at time of qs. */ 234 /* gpnum at time of quiescent state. */
235 bool passed_quiesc; /* User-mode/idle loop etc. */ 235 bool passed_quiesce; /* User-mode/idle loop etc. */
236 bool qs_pending; /* Core waits for quiesc state. */ 236 bool qs_pending; /* Core waits for quiesc state. */
237 bool beenonline; /* CPU online at least once. */ 237 bool beenonline; /* CPU online at least once. */
238 bool preemptible; /* Preemptible RCU? */ 238 bool preemptible; /* Preemptible RCU? */
@@ -299,6 +299,7 @@ struct rcu_data {
299 unsigned long n_rp_need_nothing; 299 unsigned long n_rp_need_nothing;
300 300
301 int cpu; 301 int cpu;
302 struct rcu_state *rsp;
302}; 303};
303 304
304/* Values for signaled field in struct rcu_state. */ 305/* Values for signaled field in struct rcu_state. */
@@ -417,6 +418,13 @@ extern struct rcu_state rcu_preempt_state;
417DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data); 418DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
418#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ 419#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
419 420
421#ifdef CONFIG_RCU_BOOST
422DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
423DECLARE_PER_CPU(int, rcu_cpu_kthread_cpu);
424DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
425DECLARE_PER_CPU(char, rcu_cpu_has_work);
426#endif /* #ifdef CONFIG_RCU_BOOST */
427
420#ifndef RCU_TREE_NONCORE 428#ifndef RCU_TREE_NONCORE
421 429
422/* Forward declarations for rcutree_plugin.h */ 430/* Forward declarations for rcutree_plugin.h */
@@ -430,7 +438,7 @@ static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
430static void rcu_stop_cpu_kthread(int cpu); 438static void rcu_stop_cpu_kthread(int cpu);
431#endif /* #ifdef CONFIG_HOTPLUG_CPU */ 439#endif /* #ifdef CONFIG_HOTPLUG_CPU */
432static void rcu_print_detail_task_stall(struct rcu_state *rsp); 440static void rcu_print_detail_task_stall(struct rcu_state *rsp);
433static void rcu_print_task_stall(struct rcu_node *rnp); 441static int rcu_print_task_stall(struct rcu_node *rnp);
434static void rcu_preempt_stall_reset(void); 442static void rcu_preempt_stall_reset(void);
435static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp); 443static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp);
436#ifdef CONFIG_HOTPLUG_CPU 444#ifdef CONFIG_HOTPLUG_CPU
@@ -450,7 +458,6 @@ static int rcu_preempt_needs_cpu(int cpu);
450static void __cpuinit rcu_preempt_init_percpu_data(int cpu); 458static void __cpuinit rcu_preempt_init_percpu_data(int cpu);
451static void rcu_preempt_send_cbs_to_online(void); 459static void rcu_preempt_send_cbs_to_online(void);
452static void __init __rcu_init_preempt(void); 460static void __init __rcu_init_preempt(void);
453static void rcu_needs_cpu_flush(void);
454static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); 461static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
455static void rcu_preempt_boost_start_gp(struct rcu_node *rnp); 462static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
456static void invoke_rcu_callbacks_kthread(void); 463static void invoke_rcu_callbacks_kthread(void);
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 8aafbb80b8b0..4b9b9f8a4184 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -27,6 +27,14 @@
27#include <linux/delay.h> 27#include <linux/delay.h>
28#include <linux/stop_machine.h> 28#include <linux/stop_machine.h>
29 29
30#define RCU_KTHREAD_PRIO 1
31
32#ifdef CONFIG_RCU_BOOST
33#define RCU_BOOST_PRIO CONFIG_RCU_BOOST_PRIO
34#else
35#define RCU_BOOST_PRIO RCU_KTHREAD_PRIO
36#endif
37
30/* 38/*
31 * Check the RCU kernel configuration parameters and print informative 39 * Check the RCU kernel configuration parameters and print informative
32 * messages about anything out of the ordinary. If you like #ifdef, you 40 * messages about anything out of the ordinary. If you like #ifdef, you
@@ -64,7 +72,7 @@ static void __init rcu_bootup_announce_oddness(void)
64 72
65#ifdef CONFIG_TREE_PREEMPT_RCU 73#ifdef CONFIG_TREE_PREEMPT_RCU
66 74
67struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt_state); 75struct rcu_state rcu_preempt_state = RCU_STATE_INITIALIZER(rcu_preempt);
68DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); 76DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
69static struct rcu_state *rcu_state = &rcu_preempt_state; 77static struct rcu_state *rcu_state = &rcu_preempt_state;
70 78
@@ -122,9 +130,11 @@ static void rcu_preempt_qs(int cpu)
122{ 130{
123 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); 131 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
124 132
125 rdp->passed_quiesc_completed = rdp->gpnum - 1; 133 rdp->passed_quiesce_gpnum = rdp->gpnum;
126 barrier(); 134 barrier();
127 rdp->passed_quiesc = 1; 135 if (rdp->passed_quiesce == 0)
136 trace_rcu_grace_period("rcu_preempt", rdp->gpnum, "cpuqs");
137 rdp->passed_quiesce = 1;
128 current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; 138 current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
129} 139}
130 140
@@ -190,6 +200,11 @@ static void rcu_preempt_note_context_switch(int cpu)
190 if (rnp->qsmask & rdp->grpmask) 200 if (rnp->qsmask & rdp->grpmask)
191 rnp->gp_tasks = &t->rcu_node_entry; 201 rnp->gp_tasks = &t->rcu_node_entry;
192 } 202 }
203 trace_rcu_preempt_task(rdp->rsp->name,
204 t->pid,
205 (rnp->qsmask & rdp->grpmask)
206 ? rnp->gpnum
207 : rnp->gpnum + 1);
193 raw_spin_unlock_irqrestore(&rnp->lock, flags); 208 raw_spin_unlock_irqrestore(&rnp->lock, flags);
194 } else if (t->rcu_read_lock_nesting < 0 && 209 } else if (t->rcu_read_lock_nesting < 0 &&
195 t->rcu_read_unlock_special) { 210 t->rcu_read_unlock_special) {
@@ -299,6 +314,9 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)
299 int empty_exp; 314 int empty_exp;
300 unsigned long flags; 315 unsigned long flags;
301 struct list_head *np; 316 struct list_head *np;
317#ifdef CONFIG_RCU_BOOST
318 struct rt_mutex *rbmp = NULL;
319#endif /* #ifdef CONFIG_RCU_BOOST */
302 struct rcu_node *rnp; 320 struct rcu_node *rnp;
303 int special; 321 int special;
304 322
@@ -344,6 +362,9 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)
344 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */ 362 smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
345 np = rcu_next_node_entry(t, rnp); 363 np = rcu_next_node_entry(t, rnp);
346 list_del_init(&t->rcu_node_entry); 364 list_del_init(&t->rcu_node_entry);
365 t->rcu_blocked_node = NULL;
366 trace_rcu_unlock_preempted_task("rcu_preempt",
367 rnp->gpnum, t->pid);
347 if (&t->rcu_node_entry == rnp->gp_tasks) 368 if (&t->rcu_node_entry == rnp->gp_tasks)
348 rnp->gp_tasks = np; 369 rnp->gp_tasks = np;
349 if (&t->rcu_node_entry == rnp->exp_tasks) 370 if (&t->rcu_node_entry == rnp->exp_tasks)
@@ -351,30 +372,34 @@ static noinline void rcu_read_unlock_special(struct task_struct *t)
351#ifdef CONFIG_RCU_BOOST 372#ifdef CONFIG_RCU_BOOST
352 if (&t->rcu_node_entry == rnp->boost_tasks) 373 if (&t->rcu_node_entry == rnp->boost_tasks)
353 rnp->boost_tasks = np; 374 rnp->boost_tasks = np;
354 /* Snapshot and clear ->rcu_boosted with rcu_node lock held. */ 375 /* Snapshot/clear ->rcu_boost_mutex with rcu_node lock held. */
355 if (t->rcu_boosted) { 376 if (t->rcu_boost_mutex) {
356 special |= RCU_READ_UNLOCK_BOOSTED; 377 rbmp = t->rcu_boost_mutex;
357 t->rcu_boosted = 0; 378 t->rcu_boost_mutex = NULL;
358 } 379 }
359#endif /* #ifdef CONFIG_RCU_BOOST */ 380#endif /* #ifdef CONFIG_RCU_BOOST */
360 t->rcu_blocked_node = NULL;
361 381
362 /* 382 /*
363 * If this was the last task on the current list, and if 383 * If this was the last task on the current list, and if
364 * we aren't waiting on any CPUs, report the quiescent state. 384 * we aren't waiting on any CPUs, report the quiescent state.
365 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock. 385 * Note that rcu_report_unblock_qs_rnp() releases rnp->lock.
366 */ 386 */
367 if (empty) 387 if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) {
368 raw_spin_unlock_irqrestore(&rnp->lock, flags); 388 trace_rcu_quiescent_state_report("preempt_rcu",
369 else 389 rnp->gpnum,
390 0, rnp->qsmask,
391 rnp->level,
392 rnp->grplo,
393 rnp->grphi,
394 !!rnp->gp_tasks);
370 rcu_report_unblock_qs_rnp(rnp, flags); 395 rcu_report_unblock_qs_rnp(rnp, flags);
396 } else
397 raw_spin_unlock_irqrestore(&rnp->lock, flags);
371 398
372#ifdef CONFIG_RCU_BOOST 399#ifdef CONFIG_RCU_BOOST
373 /* Unboost if we were boosted. */ 400 /* Unboost if we were boosted. */
374 if (special & RCU_READ_UNLOCK_BOOSTED) { 401 if (rbmp)
375 rt_mutex_unlock(t->rcu_boost_mutex); 402 rt_mutex_unlock(rbmp);
376 t->rcu_boost_mutex = NULL;
377 }
378#endif /* #ifdef CONFIG_RCU_BOOST */ 403#endif /* #ifdef CONFIG_RCU_BOOST */
379 404
380 /* 405 /*
@@ -399,10 +424,10 @@ void __rcu_read_unlock(void)
399{ 424{
400 struct task_struct *t = current; 425 struct task_struct *t = current;
401 426
402 barrier(); /* needed if we ever invoke rcu_read_unlock in rcutree.c */
403 if (t->rcu_read_lock_nesting != 1) 427 if (t->rcu_read_lock_nesting != 1)
404 --t->rcu_read_lock_nesting; 428 --t->rcu_read_lock_nesting;
405 else { 429 else {
430 barrier(); /* critical section before exit code. */
406 t->rcu_read_lock_nesting = INT_MIN; 431 t->rcu_read_lock_nesting = INT_MIN;
407 barrier(); /* assign before ->rcu_read_unlock_special load */ 432 barrier(); /* assign before ->rcu_read_unlock_special load */
408 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special))) 433 if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
@@ -466,16 +491,20 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp)
466 * Scan the current list of tasks blocked within RCU read-side critical 491 * Scan the current list of tasks blocked within RCU read-side critical
467 * sections, printing out the tid of each. 492 * sections, printing out the tid of each.
468 */ 493 */
469static void rcu_print_task_stall(struct rcu_node *rnp) 494static int rcu_print_task_stall(struct rcu_node *rnp)
470{ 495{
471 struct task_struct *t; 496 struct task_struct *t;
497 int ndetected = 0;
472 498
473 if (!rcu_preempt_blocked_readers_cgp(rnp)) 499 if (!rcu_preempt_blocked_readers_cgp(rnp))
474 return; 500 return 0;
475 t = list_entry(rnp->gp_tasks, 501 t = list_entry(rnp->gp_tasks,
476 struct task_struct, rcu_node_entry); 502 struct task_struct, rcu_node_entry);
477 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) 503 list_for_each_entry_continue(t, &rnp->blkd_tasks, rcu_node_entry) {
478 printk(" P%d", t->pid); 504 printk(" P%d", t->pid);
505 ndetected++;
506 }
507 return ndetected;
479} 508}
480 509
481/* 510/*
@@ -656,18 +685,9 @@ EXPORT_SYMBOL_GPL(call_rcu);
656 */ 685 */
657void synchronize_rcu(void) 686void synchronize_rcu(void)
658{ 687{
659 struct rcu_synchronize rcu;
660
661 if (!rcu_scheduler_active) 688 if (!rcu_scheduler_active)
662 return; 689 return;
663 690 wait_rcu_gp(call_rcu);
664 init_rcu_head_on_stack(&rcu.head);
665 init_completion(&rcu.completion);
666 /* Will wake me after RCU finished. */
667 call_rcu(&rcu.head, wakeme_after_rcu);
668 /* Wait for it. */
669 wait_for_completion(&rcu.completion);
670 destroy_rcu_head_on_stack(&rcu.head);
671} 691}
672EXPORT_SYMBOL_GPL(synchronize_rcu); 692EXPORT_SYMBOL_GPL(synchronize_rcu);
673 693
@@ -968,8 +988,9 @@ static void rcu_print_detail_task_stall(struct rcu_state *rsp)
968 * Because preemptible RCU does not exist, we never have to check for 988 * Because preemptible RCU does not exist, we never have to check for
969 * tasks blocked within RCU read-side critical sections. 989 * tasks blocked within RCU read-side critical sections.
970 */ 990 */
971static void rcu_print_task_stall(struct rcu_node *rnp) 991static int rcu_print_task_stall(struct rcu_node *rnp)
972{ 992{
993 return 0;
973} 994}
974 995
975/* 996/*
@@ -1136,6 +1157,8 @@ static void rcu_initiate_boost_trace(struct rcu_node *rnp)
1136 1157
1137#endif /* #else #ifdef CONFIG_RCU_TRACE */ 1158#endif /* #else #ifdef CONFIG_RCU_TRACE */
1138 1159
1160static struct lock_class_key rcu_boost_class;
1161
1139/* 1162/*
1140 * Carry out RCU priority boosting on the task indicated by ->exp_tasks 1163 * Carry out RCU priority boosting on the task indicated by ->exp_tasks
1141 * or ->boost_tasks, advancing the pointer to the next task in the 1164 * or ->boost_tasks, advancing the pointer to the next task in the
@@ -1198,8 +1221,10 @@ static int rcu_boost(struct rcu_node *rnp)
1198 */ 1221 */
1199 t = container_of(tb, struct task_struct, rcu_node_entry); 1222 t = container_of(tb, struct task_struct, rcu_node_entry);
1200 rt_mutex_init_proxy_locked(&mtx, t); 1223 rt_mutex_init_proxy_locked(&mtx, t);
1224 /* Avoid lockdep false positives. This rt_mutex is its own thing. */
1225 lockdep_set_class_and_name(&mtx.wait_lock, &rcu_boost_class,
1226 "rcu_boost_mutex");
1201 t->rcu_boost_mutex = &mtx; 1227 t->rcu_boost_mutex = &mtx;
1202 t->rcu_boosted = 1;
1203 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1228 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1204 rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */ 1229 rt_mutex_lock(&mtx); /* Side effect: boosts task t's priority. */
1205 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */ 1230 rt_mutex_unlock(&mtx); /* Keep lockdep happy. */
@@ -1228,9 +1253,12 @@ static int rcu_boost_kthread(void *arg)
1228 int spincnt = 0; 1253 int spincnt = 0;
1229 int more2boost; 1254 int more2boost;
1230 1255
1256 trace_rcu_utilization("Start boost kthread@init");
1231 for (;;) { 1257 for (;;) {
1232 rnp->boost_kthread_status = RCU_KTHREAD_WAITING; 1258 rnp->boost_kthread_status = RCU_KTHREAD_WAITING;
1259 trace_rcu_utilization("End boost kthread@rcu_wait");
1233 rcu_wait(rnp->boost_tasks || rnp->exp_tasks); 1260 rcu_wait(rnp->boost_tasks || rnp->exp_tasks);
1261 trace_rcu_utilization("Start boost kthread@rcu_wait");
1234 rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; 1262 rnp->boost_kthread_status = RCU_KTHREAD_RUNNING;
1235 more2boost = rcu_boost(rnp); 1263 more2boost = rcu_boost(rnp);
1236 if (more2boost) 1264 if (more2boost)
@@ -1238,11 +1266,14 @@ static int rcu_boost_kthread(void *arg)
1238 else 1266 else
1239 spincnt = 0; 1267 spincnt = 0;
1240 if (spincnt > 10) { 1268 if (spincnt > 10) {
1269 trace_rcu_utilization("End boost kthread@rcu_yield");
1241 rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp); 1270 rcu_yield(rcu_boost_kthread_timer, (unsigned long)rnp);
1271 trace_rcu_utilization("Start boost kthread@rcu_yield");
1242 spincnt = 0; 1272 spincnt = 0;
1243 } 1273 }
1244 } 1274 }
1245 /* NOTREACHED */ 1275 /* NOTREACHED */
1276 trace_rcu_utilization("End boost kthread@notreached");
1246 return 0; 1277 return 0;
1247} 1278}
1248 1279
@@ -1291,11 +1322,9 @@ static void invoke_rcu_callbacks_kthread(void)
1291 1322
1292 local_irq_save(flags); 1323 local_irq_save(flags);
1293 __this_cpu_write(rcu_cpu_has_work, 1); 1324 __this_cpu_write(rcu_cpu_has_work, 1);
1294 if (__this_cpu_read(rcu_cpu_kthread_task) == NULL) { 1325 if (__this_cpu_read(rcu_cpu_kthread_task) != NULL &&
1295 local_irq_restore(flags); 1326 current != __this_cpu_read(rcu_cpu_kthread_task))
1296 return; 1327 wake_up_process(__this_cpu_read(rcu_cpu_kthread_task));
1297 }
1298 wake_up_process(__this_cpu_read(rcu_cpu_kthread_task));
1299 local_irq_restore(flags); 1328 local_irq_restore(flags);
1300} 1329}
1301 1330
@@ -1343,13 +1372,13 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1343 if (rnp->boost_kthread_task != NULL) 1372 if (rnp->boost_kthread_task != NULL)
1344 return 0; 1373 return 0;
1345 t = kthread_create(rcu_boost_kthread, (void *)rnp, 1374 t = kthread_create(rcu_boost_kthread, (void *)rnp,
1346 "rcub%d", rnp_index); 1375 "rcub/%d", rnp_index);
1347 if (IS_ERR(t)) 1376 if (IS_ERR(t))
1348 return PTR_ERR(t); 1377 return PTR_ERR(t);
1349 raw_spin_lock_irqsave(&rnp->lock, flags); 1378 raw_spin_lock_irqsave(&rnp->lock, flags);
1350 rnp->boost_kthread_task = t; 1379 rnp->boost_kthread_task = t;
1351 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1380 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1352 sp.sched_priority = RCU_KTHREAD_PRIO; 1381 sp.sched_priority = RCU_BOOST_PRIO;
1353 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); 1382 sched_setscheduler_nocheck(t, SCHED_FIFO, &sp);
1354 wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ 1383 wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */
1355 return 0; 1384 return 0;
@@ -1444,6 +1473,7 @@ static void rcu_yield(void (*f)(unsigned long), unsigned long arg)
1444{ 1473{
1445 struct sched_param sp; 1474 struct sched_param sp;
1446 struct timer_list yield_timer; 1475 struct timer_list yield_timer;
1476 int prio = current->rt_priority;
1447 1477
1448 setup_timer_on_stack(&yield_timer, f, arg); 1478 setup_timer_on_stack(&yield_timer, f, arg);
1449 mod_timer(&yield_timer, jiffies + 2); 1479 mod_timer(&yield_timer, jiffies + 2);
@@ -1451,7 +1481,8 @@ static void rcu_yield(void (*f)(unsigned long), unsigned long arg)
1451 sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp); 1481 sched_setscheduler_nocheck(current, SCHED_NORMAL, &sp);
1452 set_user_nice(current, 19); 1482 set_user_nice(current, 19);
1453 schedule(); 1483 schedule();
1454 sp.sched_priority = RCU_KTHREAD_PRIO; 1484 set_user_nice(current, 0);
1485 sp.sched_priority = prio;
1455 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp); 1486 sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
1456 del_timer(&yield_timer); 1487 del_timer(&yield_timer);
1457} 1488}
@@ -1489,7 +1520,8 @@ static int rcu_cpu_kthread_should_stop(int cpu)
1489 1520
1490/* 1521/*
1491 * Per-CPU kernel thread that invokes RCU callbacks. This replaces the 1522 * Per-CPU kernel thread that invokes RCU callbacks. This replaces the
1492 * earlier RCU softirq. 1523 * RCU softirq used in flavors and configurations of RCU that do not
1524 * support RCU priority boosting.
1493 */ 1525 */
1494static int rcu_cpu_kthread(void *arg) 1526static int rcu_cpu_kthread(void *arg)
1495{ 1527{
@@ -1500,9 +1532,12 @@ static int rcu_cpu_kthread(void *arg)
1500 char work; 1532 char work;
1501 char *workp = &per_cpu(rcu_cpu_has_work, cpu); 1533 char *workp = &per_cpu(rcu_cpu_has_work, cpu);
1502 1534
1535 trace_rcu_utilization("Start CPU kthread@init");
1503 for (;;) { 1536 for (;;) {
1504 *statusp = RCU_KTHREAD_WAITING; 1537 *statusp = RCU_KTHREAD_WAITING;
1538 trace_rcu_utilization("End CPU kthread@rcu_wait");
1505 rcu_wait(*workp != 0 || kthread_should_stop()); 1539 rcu_wait(*workp != 0 || kthread_should_stop());
1540 trace_rcu_utilization("Start CPU kthread@rcu_wait");
1506 local_bh_disable(); 1541 local_bh_disable();
1507 if (rcu_cpu_kthread_should_stop(cpu)) { 1542 if (rcu_cpu_kthread_should_stop(cpu)) {
1508 local_bh_enable(); 1543 local_bh_enable();
@@ -1523,11 +1558,14 @@ static int rcu_cpu_kthread(void *arg)
1523 spincnt = 0; 1558 spincnt = 0;
1524 if (spincnt > 10) { 1559 if (spincnt > 10) {
1525 *statusp = RCU_KTHREAD_YIELDING; 1560 *statusp = RCU_KTHREAD_YIELDING;
1561 trace_rcu_utilization("End CPU kthread@rcu_yield");
1526 rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu); 1562 rcu_yield(rcu_cpu_kthread_timer, (unsigned long)cpu);
1563 trace_rcu_utilization("Start CPU kthread@rcu_yield");
1527 spincnt = 0; 1564 spincnt = 0;
1528 } 1565 }
1529 } 1566 }
1530 *statusp = RCU_KTHREAD_STOPPED; 1567 *statusp = RCU_KTHREAD_STOPPED;
1568 trace_rcu_utilization("End CPU kthread@term");
1531 return 0; 1569 return 0;
1532} 1570}
1533 1571
@@ -1560,7 +1598,10 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu)
1560 if (!rcu_scheduler_fully_active || 1598 if (!rcu_scheduler_fully_active ||
1561 per_cpu(rcu_cpu_kthread_task, cpu) != NULL) 1599 per_cpu(rcu_cpu_kthread_task, cpu) != NULL)
1562 return 0; 1600 return 0;
1563 t = kthread_create(rcu_cpu_kthread, (void *)(long)cpu, "rcuc%d", cpu); 1601 t = kthread_create_on_node(rcu_cpu_kthread,
1602 (void *)(long)cpu,
1603 cpu_to_node(cpu),
1604 "rcuc/%d", cpu);
1564 if (IS_ERR(t)) 1605 if (IS_ERR(t))
1565 return PTR_ERR(t); 1606 return PTR_ERR(t);
1566 if (cpu_online(cpu)) 1607 if (cpu_online(cpu))
@@ -1669,7 +1710,7 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp,
1669 return 0; 1710 return 0;
1670 if (rnp->node_kthread_task == NULL) { 1711 if (rnp->node_kthread_task == NULL) {
1671 t = kthread_create(rcu_node_kthread, (void *)rnp, 1712 t = kthread_create(rcu_node_kthread, (void *)rnp,
1672 "rcun%d", rnp_index); 1713 "rcun/%d", rnp_index);
1673 if (IS_ERR(t)) 1714 if (IS_ERR(t))
1674 return PTR_ERR(t); 1715 return PTR_ERR(t);
1675 raw_spin_lock_irqsave(&rnp->lock, flags); 1716 raw_spin_lock_irqsave(&rnp->lock, flags);
@@ -1907,15 +1948,6 @@ int rcu_needs_cpu(int cpu)
1907 return rcu_needs_cpu_quick_check(cpu); 1948 return rcu_needs_cpu_quick_check(cpu);
1908} 1949}
1909 1950
1910/*
1911 * Check to see if we need to continue a callback-flush operations to
1912 * allow the last CPU to enter dyntick-idle mode. But fast dyntick-idle
1913 * entry is not configured, so we never do need to.
1914 */
1915static void rcu_needs_cpu_flush(void)
1916{
1917}
1918
1919#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ 1951#else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */
1920 1952
1921#define RCU_NEEDS_CPU_FLUSHES 5 1953#define RCU_NEEDS_CPU_FLUSHES 5
@@ -1991,20 +2023,4 @@ int rcu_needs_cpu(int cpu)
1991 return c; 2023 return c;
1992} 2024}
1993 2025
1994/*
1995 * Check to see if we need to continue a callback-flush operations to
1996 * allow the last CPU to enter dyntick-idle mode.
1997 */
1998static void rcu_needs_cpu_flush(void)
1999{
2000 int cpu = smp_processor_id();
2001 unsigned long flags;
2002
2003 if (per_cpu(rcu_dyntick_drain, cpu) <= 0)
2004 return;
2005 local_irq_save(flags);
2006 (void)rcu_needs_cpu(cpu);
2007 local_irq_restore(flags);
2008}
2009
2010#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ 2026#endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 3b0c0986afc0..9feffa4c0695 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -48,11 +48,6 @@
48 48
49#ifdef CONFIG_RCU_BOOST 49#ifdef CONFIG_RCU_BOOST
50 50
51DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
52DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_cpu);
53DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_loops);
54DECLARE_PER_CPU(char, rcu_cpu_has_work);
55
56static char convert_kthread_status(unsigned int kthread_status) 51static char convert_kthread_status(unsigned int kthread_status)
57{ 52{
58 if (kthread_status > RCU_KTHREAD_MAX) 53 if (kthread_status > RCU_KTHREAD_MAX)
@@ -66,11 +61,11 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
66{ 61{
67 if (!rdp->beenonline) 62 if (!rdp->beenonline)
68 return; 63 return;
69 seq_printf(m, "%3d%cc=%lu g=%lu pq=%d pqc=%lu qp=%d", 64 seq_printf(m, "%3d%cc=%lu g=%lu pq=%d pgp=%lu qp=%d",
70 rdp->cpu, 65 rdp->cpu,
71 cpu_is_offline(rdp->cpu) ? '!' : ' ', 66 cpu_is_offline(rdp->cpu) ? '!' : ' ',
72 rdp->completed, rdp->gpnum, 67 rdp->completed, rdp->gpnum,
73 rdp->passed_quiesc, rdp->passed_quiesc_completed, 68 rdp->passed_quiesce, rdp->passed_quiesce_gpnum,
74 rdp->qs_pending); 69 rdp->qs_pending);
75#ifdef CONFIG_NO_HZ 70#ifdef CONFIG_NO_HZ
76 seq_printf(m, " dt=%d/%d/%d df=%lu", 71 seq_printf(m, " dt=%d/%d/%d df=%lu",
@@ -144,7 +139,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
144 rdp->cpu, 139 rdp->cpu,
145 cpu_is_offline(rdp->cpu) ? "\"N\"" : "\"Y\"", 140 cpu_is_offline(rdp->cpu) ? "\"N\"" : "\"Y\"",
146 rdp->completed, rdp->gpnum, 141 rdp->completed, rdp->gpnum,
147 rdp->passed_quiesc, rdp->passed_quiesc_completed, 142 rdp->passed_quiesce, rdp->passed_quiesce_gpnum,
148 rdp->qs_pending); 143 rdp->qs_pending);
149#ifdef CONFIG_NO_HZ 144#ifdef CONFIG_NO_HZ
150 seq_printf(m, ",%d,%d,%d,%lu", 145 seq_printf(m, ",%d,%d,%d,%lu",
@@ -175,7 +170,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
175 170
176static int show_rcudata_csv(struct seq_file *m, void *unused) 171static int show_rcudata_csv(struct seq_file *m, void *unused)
177{ 172{
178 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\","); 173 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pgp\",\"pq\",");
179#ifdef CONFIG_NO_HZ 174#ifdef CONFIG_NO_HZ
180 seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\","); 175 seq_puts(m, "\"dt\",\"dt nesting\",\"dt NMI nesting\",\"df\",");
181#endif /* #ifdef CONFIG_NO_HZ */ 176#endif /* #ifdef CONFIG_NO_HZ */
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index 255e1662acdb..5e8d9cce7470 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -579,6 +579,7 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
579 struct rt_mutex_waiter *waiter) 579 struct rt_mutex_waiter *waiter)
580{ 580{
581 int ret = 0; 581 int ret = 0;
582 int was_disabled;
582 583
583 for (;;) { 584 for (;;) {
584 /* Try to acquire the lock: */ 585 /* Try to acquire the lock: */
@@ -601,10 +602,17 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state,
601 602
602 raw_spin_unlock(&lock->wait_lock); 603 raw_spin_unlock(&lock->wait_lock);
603 604
605 was_disabled = irqs_disabled();
606 if (was_disabled)
607 local_irq_enable();
608
604 debug_rt_mutex_print_deadlock(waiter); 609 debug_rt_mutex_print_deadlock(waiter);
605 610
606 schedule_rt_mutex(lock); 611 schedule_rt_mutex(lock);
607 612
613 if (was_disabled)
614 local_irq_disable();
615
608 raw_spin_lock(&lock->wait_lock); 616 raw_spin_lock(&lock->wait_lock);
609 set_current_state(state); 617 set_current_state(state);
610 } 618 }
diff --git a/kernel/sched.c b/kernel/sched.c
index 8aa00803c1ec..03ad0113801a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4213,6 +4213,7 @@ static inline void schedule_debug(struct task_struct *prev)
4213 */ 4213 */
4214 if (unlikely(in_atomic_preempt_off() && !prev->exit_state)) 4214 if (unlikely(in_atomic_preempt_off() && !prev->exit_state))
4215 __schedule_bug(prev); 4215 __schedule_bug(prev);
4216 rcu_sleep_check();
4216 4217
4217 profile_hit(SCHED_PROFILING, __builtin_return_address(0)); 4218 profile_hit(SCHED_PROFILING, __builtin_return_address(0));
4218 4219
@@ -5955,15 +5956,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
5955} 5956}
5956 5957
5957/* 5958/*
5958 * In a system that switches off the HZ timer nohz_cpu_mask
5959 * indicates which cpus entered this state. This is used
5960 * in the rcu update to wait only for active cpus. For system
5961 * which do not switch off the HZ timer nohz_cpu_mask should
5962 * always be CPU_BITS_NONE.
5963 */
5964cpumask_var_t nohz_cpu_mask;
5965
5966/*
5967 * Increase the granularity value when there are more CPUs, 5959 * Increase the granularity value when there are more CPUs,
5968 * because with more CPUs the 'effective latency' as visible 5960 * because with more CPUs the 'effective latency' as visible
5969 * to users decreases. But the relationship is not linear, 5961 * to users decreases. But the relationship is not linear,
@@ -8175,8 +8167,6 @@ void __init sched_init(void)
8175 */ 8167 */
8176 current->sched_class = &fair_sched_class; 8168 current->sched_class = &fair_sched_class;
8177 8169
8178 /* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
8179 zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
8180#ifdef CONFIG_SMP 8170#ifdef CONFIG_SMP
8181 zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT); 8171 zalloc_cpumask_var(&sched_domains_tmpmask, GFP_NOWAIT);
8182#ifdef CONFIG_NO_HZ 8172#ifdef CONFIG_NO_HZ
@@ -8206,6 +8196,7 @@ void __might_sleep(const char *file, int line, int preempt_offset)
8206{ 8196{
8207 static unsigned long prev_jiffy; /* ratelimiting */ 8197 static unsigned long prev_jiffy; /* ratelimiting */
8208 8198
8199 rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
8209 if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) || 8200 if ((preempt_count_equals(preempt_offset) && !irqs_disabled()) ||
8210 system_state != SYSTEM_RUNNING || oops_in_progress) 8201 system_state != SYSTEM_RUNNING || oops_in_progress)
8211 return; 8202 return;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index d5097c44b407..eb98e55196b9 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -139,7 +139,6 @@ static void tick_nohz_update_jiffies(ktime_t now)
139 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); 139 struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
140 unsigned long flags; 140 unsigned long flags;
141 141
142 cpumask_clear_cpu(cpu, nohz_cpu_mask);
143 ts->idle_waketime = now; 142 ts->idle_waketime = now;
144 143
145 local_irq_save(flags); 144 local_irq_save(flags);
@@ -389,9 +388,6 @@ void tick_nohz_stop_sched_tick(int inidle)
389 else 388 else
390 expires.tv64 = KTIME_MAX; 389 expires.tv64 = KTIME_MAX;
391 390
392 if (delta_jiffies > 1)
393 cpumask_set_cpu(cpu, nohz_cpu_mask);
394
395 /* Skip reprogram of event if its not changed */ 391 /* Skip reprogram of event if its not changed */
396 if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) 392 if (ts->tick_stopped && ktime_equal(expires, dev->next_event))
397 goto out; 393 goto out;
@@ -441,7 +437,6 @@ void tick_nohz_stop_sched_tick(int inidle)
441 * softirq. 437 * softirq.
442 */ 438 */
443 tick_do_update_jiffies64(ktime_get()); 439 tick_do_update_jiffies64(ktime_get());
444 cpumask_clear_cpu(cpu, nohz_cpu_mask);
445 } 440 }
446 raise_softirq_irqoff(TIMER_SOFTIRQ); 441 raise_softirq_irqoff(TIMER_SOFTIRQ);
447out: 442out:
@@ -524,7 +519,6 @@ void tick_nohz_restart_sched_tick(void)
524 /* Update jiffies first */ 519 /* Update jiffies first */
525 select_nohz_load_balancer(0); 520 select_nohz_load_balancer(0);
526 tick_do_update_jiffies64(now); 521 tick_do_update_jiffies64(now);
527 cpumask_clear_cpu(cpu, nohz_cpu_mask);
528 522
529#ifndef CONFIG_VIRT_CPU_ACCOUNTING 523#ifndef CONFIG_VIRT_CPU_ACCOUNTING
530 /* 524 /*