aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/RCU/NMI-RCU.txt3
-rw-r--r--Documentation/RCU/RTFP.txt108
-rw-r--r--Documentation/RCU/checklist.txt89
-rw-r--r--Documentation/RCU/torture.txt48
-rw-r--r--Documentation/RCU/whatisRCU.txt58
-rw-r--r--MAINTAINERS5
-rw-r--r--arch/ia64/sn/kernel/irq.c1
-rw-r--r--crypto/async_tx/async_tx.c1
-rw-r--r--drivers/firewire/fw-card.c32
-rw-r--r--drivers/firewire/fw-device.c5
-rw-r--r--drivers/firewire/fw-device.h1
-rw-r--r--drivers/firewire/fw-ohci.c1
-rw-r--r--drivers/firewire/fw-sbp2.c25
-rw-r--r--drivers/firewire/fw-transaction.c32
-rw-r--r--drivers/firewire/fw-transaction.h34
-rw-r--r--drivers/ieee1394/csr1212.c32
-rw-r--r--drivers/ieee1394/dma.c2
-rw-r--r--drivers/ieee1394/highlevel.c4
-rw-r--r--drivers/ieee1394/highlevel.h13
-rw-r--r--drivers/ieee1394/raw1394.c20
-rw-r--r--drivers/ieee1394/sbp2.c22
-rw-r--r--drivers/ieee1394/sbp2.h1
-rw-r--r--drivers/ieee1394/video1394.c2
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c1
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs_mcast.c3
-rw-r--r--drivers/net/macvlan.c2
-rw-r--r--drivers/scsi/sd.c5
-rw-r--r--include/asm-generic/pgtable.h2
-rw-r--r--include/linux/dcache.h1
-rw-r--r--include/linux/list.h367
-rw-r--r--include/linux/rcuclassic.h3
-rw-r--r--include/linux/rculist.h369
-rw-r--r--include/linux/rcupdate.h26
-rw-r--r--include/linux/rcupreempt.h42
-rw-r--r--include/scsi/scsi_device.h1
-rw-r--r--init/main.c1
-rw-r--r--kernel/pid.c1
-rw-r--r--kernel/rcuclassic.c34
-rw-r--r--kernel/rcupdate.c71
-rw-r--r--kernel/rcupreempt.c418
-rw-r--r--kernel/rcupreempt_trace.c1
-rw-r--r--kernel/rcutorture.c174
-rw-r--r--kernel/smp.c1
-rw-r--r--kernel/sysctl.c13
-rw-r--r--lib/Kconfig.debug20
-rw-r--r--lib/textsearch.c1
-rw-r--r--net/802/psnap.c1
-rw-r--r--net/8021q/vlan.c1
-rw-r--r--net/bridge/br_fdb.c1
-rw-r--r--net/bridge/br_stp.c1
-rw-r--r--net/netfilter/nf_conntrack_helper.c1
-rw-r--r--net/netfilter/nf_conntrack_netlink.c1
-rw-r--r--net/netlabel/netlabel_domainhash.c3
53 files changed, 1523 insertions, 582 deletions
diff --git a/Documentation/RCU/NMI-RCU.txt b/Documentation/RCU/NMI-RCU.txt
index c64158ecde43..a6d32e65d222 100644
--- a/Documentation/RCU/NMI-RCU.txt
+++ b/Documentation/RCU/NMI-RCU.txt
@@ -93,6 +93,9 @@ Since NMI handlers disable preemption, synchronize_sched() is guaranteed
93not to return until all ongoing NMI handlers exit. It is therefore safe 93not to return until all ongoing NMI handlers exit. It is therefore safe
94to free up the handler's data as soon as synchronize_sched() returns. 94to free up the handler's data as soon as synchronize_sched() returns.
95 95
96Important note: for this to work, the architecture in question must
97invoke irq_enter() and irq_exit() on NMI entry and exit, respectively.
98
96 99
97Answer to Quick Quiz 100Answer to Quick Quiz
98 101
diff --git a/Documentation/RCU/RTFP.txt b/Documentation/RCU/RTFP.txt
index 39ad8f56783a..9f711d2df91b 100644
--- a/Documentation/RCU/RTFP.txt
+++ b/Documentation/RCU/RTFP.txt
@@ -52,6 +52,10 @@ of each iteration. Unfortunately, chaotic relaxation requires highly
52structured data, such as the matrices used in scientific programs, and 52structured data, such as the matrices used in scientific programs, and
53is thus inapplicable to most data structures in operating-system kernels. 53is thus inapplicable to most data structures in operating-system kernels.
54 54
55In 1992, Henry (now Alexia) Massalin completed a dissertation advising
56parallel programmers to defer processing when feasible to simplify
57synchronization. RCU makes extremely heavy use of this advice.
58
55In 1993, Jacobson [Jacobson93] verbally described what is perhaps the 59In 1993, Jacobson [Jacobson93] verbally described what is perhaps the
56simplest deferred-free technique: simply waiting a fixed amount of time 60simplest deferred-free technique: simply waiting a fixed amount of time
57before freeing blocks awaiting deferred free. Jacobson did not describe 61before freeing blocks awaiting deferred free. Jacobson did not describe
@@ -138,6 +142,13 @@ blocking in read-side critical sections appeared [PaulEMcKenney2006c],
138Robert Olsson described an RCU-protected trie-hash combination 142Robert Olsson described an RCU-protected trie-hash combination
139[RobertOlsson2006a]. 143[RobertOlsson2006a].
140 144
1452007 saw the journal version of the award-winning RCU paper from 2006
146[ThomasEHart2007a], as well as a paper demonstrating use of Promela
147and Spin to mechanically verify an optimization to Oleg Nesterov's
148QRCU [PaulEMcKenney2007QRCUspin], a design document describing
149preemptible RCU [PaulEMcKenney2007PreemptibleRCU], and the three-part
150LWN "What is RCU?" series [PaulEMcKenney2007WhatIsRCUFundamentally,
151PaulEMcKenney2008WhatIsRCUUsage, and PaulEMcKenney2008WhatIsRCUAPI].
141 152
142Bibtex Entries 153Bibtex Entries
143 154
@@ -202,6 +213,20 @@ Bibtex Entries
202,Year="1991" 213,Year="1991"
203} 214}
204 215
216@phdthesis{HMassalinPhD
217,author="H. Massalin"
218,title="Synthesis: An Efficient Implementation of Fundamental Operating
219System Services"
220,school="Columbia University"
221,address="New York, NY"
222,year="1992"
223,annotation="
224 Mondo optimizing compiler.
225 Wait-free stuff.
226 Good advice: defer work to avoid synchronization.
227"
228}
229
205@unpublished{Jacobson93 230@unpublished{Jacobson93
206,author="Van Jacobson" 231,author="Van Jacobson"
207,title="Avoid Read-Side Locking Via Delayed Free" 232,title="Avoid Read-Side Locking Via Delayed Free"
@@ -635,3 +660,86 @@ Revised:
635" 660"
636} 661}
637 662
663@unpublished{PaulEMcKenney2007PreemptibleRCU
664,Author="Paul E. McKenney"
665,Title="The design of preemptible read-copy-update"
666,month="October"
667,day="8"
668,year="2007"
669,note="Available:
670\url{http://lwn.net/Articles/253651/}
671[Viewed October 25, 2007]"
672,annotation="
673 LWN article describing the design of preemptible RCU.
674"
675}
676
677########################################################################
678#
679# "What is RCU?" LWN series.
680#
681
682@unpublished{PaulEMcKenney2007WhatIsRCUFundamentally
683,Author="Paul E. McKenney and Jonathan Walpole"
684,Title="What is {RCU}, Fundamentally?"
685,month="December"
686,day="17"
687,year="2007"
688,note="Available:
689\url{http://lwn.net/Articles/262464/}
690[Viewed December 27, 2007]"
691,annotation="
692 Lays out the three basic components of RCU: (1) publish-subscribe,
693 (2) wait for pre-existing readers to complete, and (2) maintain
694 multiple versions.
695"
696}
697
698@unpublished{PaulEMcKenney2008WhatIsRCUUsage
699,Author="Paul E. McKenney"
700,Title="What is {RCU}? Part 2: Usage"
701,month="January"
702,day="4"
703,year="2008"
704,note="Available:
705\url{http://lwn.net/Articles/263130/}
706[Viewed January 4, 2008]"
707,annotation="
708 Lays out six uses of RCU:
709 1. RCU is a Reader-Writer Lock Replacement
710 2. RCU is a Restricted Reference-Counting Mechanism
711 3. RCU is a Bulk Reference-Counting Mechanism
712 4. RCU is a Poor Man's Garbage Collector
713 5. RCU is a Way of Providing Existence Guarantees
714 6. RCU is a Way of Waiting for Things to Finish
715"
716}
717
718@unpublished{PaulEMcKenney2008WhatIsRCUAPI
719,Author="Paul E. McKenney"
720,Title="{RCU} part 3: the {RCU} {API}"
721,month="January"
722,day="17"
723,year="2008"
724,note="Available:
725\url{http://lwn.net/Articles/264090/}
726[Viewed January 10, 2008]"
727,annotation="
728 Gives an overview of the Linux-kernel RCU API and a brief annotated RCU
729 bibliography.
730"
731}
732
733@article{DinakarGuniguntala2008IBMSysJ
734,author="D. Guniguntala and P. E. McKenney and J. Triplett and J. Walpole"
735,title="The read-copy-update mechanism for supporting real-time applications on shared-memory multiprocessor systems with {Linux}"
736,Year="2008"
737,Month="April"
738,journal="IBM Systems Journal"
739,volume="47"
740,number="2"
741,pages="@@-@@"
742,annotation="
743 RCU, realtime RCU, sleepable RCU, performance.
744"
745}
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
index 42b01bc2e1b4..cf5562cbe356 100644
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt
@@ -13,10 +13,13 @@ over a rather long period of time, but improvements are always welcome!
13 detailed performance measurements show that RCU is nonetheless 13 detailed performance measurements show that RCU is nonetheless
14 the right tool for the job. 14 the right tool for the job.
15 15
16 The other exception would be where performance is not an issue, 16 Another exception is where performance is not an issue, and RCU
17 and RCU provides a simpler implementation. An example of this 17 provides a simpler implementation. An example of this situation
18 situation is the dynamic NMI code in the Linux 2.6 kernel, 18 is the dynamic NMI code in the Linux 2.6 kernel, at least on
19 at least on architectures where NMIs are rare. 19 architectures where NMIs are rare.
20
21 Yet another exception is where the low real-time latency of RCU's
22 read-side primitives is critically important.
20 23
211. Does the update code have proper mutual exclusion? 241. Does the update code have proper mutual exclusion?
22 25
@@ -39,9 +42,10 @@ over a rather long period of time, but improvements are always welcome!
39 42
402. Do the RCU read-side critical sections make proper use of 432. Do the RCU read-side critical sections make proper use of
41 rcu_read_lock() and friends? These primitives are needed 44 rcu_read_lock() and friends? These primitives are needed
42 to suppress preemption (or bottom halves, in the case of 45 to prevent grace periods from ending prematurely, which
43 rcu_read_lock_bh()) in the read-side critical sections, 46 could result in data being unceremoniously freed out from
44 and are also an excellent aid to readability. 47 under your read-side code, which can greatly increase the
48 actuarial risk of your kernel.
45 49
46 As a rough rule of thumb, any dereference of an RCU-protected 50 As a rough rule of thumb, any dereference of an RCU-protected
47 pointer must be covered by rcu_read_lock() or rcu_read_lock_bh() 51 pointer must be covered by rcu_read_lock() or rcu_read_lock_bh()
@@ -54,15 +58,30 @@ over a rather long period of time, but improvements are always welcome!
54 be running while updates are in progress. There are a number 58 be running while updates are in progress. There are a number
55 of ways to handle this concurrency, depending on the situation: 59 of ways to handle this concurrency, depending on the situation:
56 60
57 a. Make updates appear atomic to readers. For example, 61 a. Use the RCU variants of the list and hlist update
62 primitives to add, remove, and replace elements on an
63 RCU-protected list. Alternatively, use the RCU-protected
64 trees that have been added to the Linux kernel.
65
66 This is almost always the best approach.
67
68 b. Proceed as in (a) above, but also maintain per-element
69 locks (that are acquired by both readers and writers)
70 that guard per-element state. Of course, fields that
71 the readers refrain from accessing can be guarded by the
72 update-side lock.
73
74 This works quite well, also.
75
76 c. Make updates appear atomic to readers. For example,
58 pointer updates to properly aligned fields will appear 77 pointer updates to properly aligned fields will appear
59 atomic, as will individual atomic primitives. Operations 78 atomic, as will individual atomic primitives. Operations
60 performed under a lock and sequences of multiple atomic 79 performed under a lock and sequences of multiple atomic
61 primitives will -not- appear to be atomic. 80 primitives will -not- appear to be atomic.
62 81
63 This is almost always the best approach. 82 This can work, but is starting to get a bit tricky.
64 83
65 b. Carefully order the updates and the reads so that 84 d. Carefully order the updates and the reads so that
66 readers see valid data at all phases of the update. 85 readers see valid data at all phases of the update.
67 This is often more difficult than it sounds, especially 86 This is often more difficult than it sounds, especially
68 given modern CPUs' tendency to reorder memory references. 87 given modern CPUs' tendency to reorder memory references.
@@ -123,18 +142,22 @@ over a rather long period of time, but improvements are always welcome!
123 when publicizing a pointer to a structure that can 142 when publicizing a pointer to a structure that can
124 be traversed by an RCU read-side critical section. 143 be traversed by an RCU read-side critical section.
125 144
1265. If call_rcu(), or a related primitive such as call_rcu_bh(), 1455. If call_rcu(), or a related primitive such as call_rcu_bh() or
127 is used, the callback function must be written to be called 146 call_rcu_sched(), is used, the callback function must be
128 from softirq context. In particular, it cannot block. 147 written to be called from softirq context. In particular,
148 it cannot block.
129 149
1306. Since synchronize_rcu() can block, it cannot be called from 1506. Since synchronize_rcu() can block, it cannot be called from
131 any sort of irq context. 151 any sort of irq context. Ditto for synchronize_sched() and
152 synchronize_srcu().
132 153
1337. If the updater uses call_rcu(), then the corresponding readers 1547. If the updater uses call_rcu(), then the corresponding readers
134 must use rcu_read_lock() and rcu_read_unlock(). If the updater 155 must use rcu_read_lock() and rcu_read_unlock(). If the updater
135 uses call_rcu_bh(), then the corresponding readers must use 156 uses call_rcu_bh(), then the corresponding readers must use
136 rcu_read_lock_bh() and rcu_read_unlock_bh(). Mixing things up 157 rcu_read_lock_bh() and rcu_read_unlock_bh(). If the updater
137 will result in confusion and broken kernels. 158 uses call_rcu_sched(), then the corresponding readers must
159 disable preemption. Mixing things up will result in confusion
160 and broken kernels.
138 161
139 One exception to this rule: rcu_read_lock() and rcu_read_unlock() 162 One exception to this rule: rcu_read_lock() and rcu_read_unlock()
140 may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh() 163 may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh()
@@ -143,9 +166,9 @@ over a rather long period of time, but improvements are always welcome!
143 such cases is a must, of course! And the jury is still out on 166 such cases is a must, of course! And the jury is still out on
144 whether the increased speed is worth it. 167 whether the increased speed is worth it.
145 168
1468. Although synchronize_rcu() is a bit slower than is call_rcu(), 1698. Although synchronize_rcu() is slower than is call_rcu(), it
147 it usually results in simpler code. So, unless update 170 usually results in simpler code. So, unless update performance
148 performance is critically important or the updaters cannot block, 171 is critically important or the updaters cannot block,
149 synchronize_rcu() should be used in preference to call_rcu(). 172 synchronize_rcu() should be used in preference to call_rcu().
150 173
151 An especially important property of the synchronize_rcu() 174 An especially important property of the synchronize_rcu()
@@ -187,23 +210,23 @@ over a rather long period of time, but improvements are always welcome!
187 number of updates per grace period. 210 number of updates per grace period.
188 211
1899. All RCU list-traversal primitives, which include 2129. All RCU list-traversal primitives, which include
190 list_for_each_rcu(), list_for_each_entry_rcu(), 213 rcu_dereference(), list_for_each_rcu(), list_for_each_entry_rcu(),
191 list_for_each_continue_rcu(), and list_for_each_safe_rcu(), 214 list_for_each_continue_rcu(), and list_for_each_safe_rcu(),
192 must be within an RCU read-side critical section. RCU 215 must be either within an RCU read-side critical section or
216 must be protected by appropriate update-side locks. RCU
193 read-side critical sections are delimited by rcu_read_lock() 217 read-side critical sections are delimited by rcu_read_lock()
194 and rcu_read_unlock(), or by similar primitives such as 218 and rcu_read_unlock(), or by similar primitives such as
195 rcu_read_lock_bh() and rcu_read_unlock_bh(). 219 rcu_read_lock_bh() and rcu_read_unlock_bh().
196 220
197 Use of the _rcu() list-traversal primitives outside of an 221 The reason that it is permissible to use RCU list-traversal
198 RCU read-side critical section causes no harm other than 222 primitives when the update-side lock is held is that doing so
199 a slight performance degradation on Alpha CPUs. It can 223 can be quite helpful in reducing code bloat when common code is
200 also be quite helpful in reducing code bloat when common 224 shared between readers and updaters.
201 code is shared between readers and updaters.
202 225
20310. Conversely, if you are in an RCU read-side critical section, 22610. Conversely, if you are in an RCU read-side critical section,
204 you -must- use the "_rcu()" variants of the list macros. 227 and you don't hold the appropriate update-side lock, you -must-
205 Failing to do so will break Alpha and confuse people reading 228 use the "_rcu()" variants of the list macros. Failing to do so
206 your code. 229 will break Alpha and confuse people reading your code.
207 230
20811. Note that synchronize_rcu() -only- guarantees to wait until 23111. Note that synchronize_rcu() -only- guarantees to wait until
209 all currently executing rcu_read_lock()-protected RCU read-side 232 all currently executing rcu_read_lock()-protected RCU read-side
@@ -230,6 +253,14 @@ over a rather long period of time, but improvements are always welcome!
230 must use whatever locking or other synchronization is required 253 must use whatever locking or other synchronization is required
231 to safely access and/or modify that data structure. 254 to safely access and/or modify that data structure.
232 255
256 RCU callbacks are -usually- executed on the same CPU that executed
257 the corresponding call_rcu(), call_rcu_bh(), or call_rcu_sched(),
258 but are by -no- means guaranteed to be. For example, if a given
259 CPU goes offline while having an RCU callback pending, then that
260 RCU callback will execute on some surviving CPU. (If this was
261 not the case, a self-spawning RCU callback would prevent the
262 victim CPU from ever going offline.)
263
23314. SRCU (srcu_read_lock(), srcu_read_unlock(), and synchronize_srcu()) 26414. SRCU (srcu_read_lock(), srcu_read_unlock(), and synchronize_srcu())
234 may only be invoked from process context. Unlike other forms of 265 may only be invoked from process context. Unlike other forms of
235 RCU, it -is- permissible to block in an SRCU read-side critical 266 RCU, it -is- permissible to block in an SRCU read-side critical
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt
index 2967a65269d8..a342b6e1cc10 100644
--- a/Documentation/RCU/torture.txt
+++ b/Documentation/RCU/torture.txt
@@ -10,23 +10,30 @@ status messages via printk(), which can be examined via the dmesg
10command (perhaps grepping for "torture"). The test is started 10command (perhaps grepping for "torture"). The test is started
11when the module is loaded, and stops when the module is unloaded. 11when the module is loaded, and stops when the module is unloaded.
12 12
13However, actually setting this config option to "y" results in the system 13CONFIG_RCU_TORTURE_TEST_RUNNABLE
14running the test immediately upon boot, and ending only when the system 14
15is taken down. Normally, one will instead want to build the system 15It is also possible to specify CONFIG_RCU_TORTURE_TEST=y, which will
16with CONFIG_RCU_TORTURE_TEST=m and to use modprobe and rmmod to control 16result in the tests being loaded into the base kernel. In this case,
17the test, perhaps using a script similar to the one shown at the end of 17the CONFIG_RCU_TORTURE_TEST_RUNNABLE config option is used to specify
18this document. Note that you will need CONFIG_MODULE_UNLOAD in order 18whether the RCU torture tests are to be started immediately during
19to be able to end the test. 19boot or whether the /proc/sys/kernel/rcutorture_runnable file is used
20to enable them. This /proc file can be used to repeatedly pause and
21restart the tests, regardless of the initial state specified by the
22CONFIG_RCU_TORTURE_TEST_RUNNABLE config option.
23
24You will normally -not- want to start the RCU torture tests during boot
25(and thus the default is CONFIG_RCU_TORTURE_TEST_RUNNABLE=n), but doing
26this can sometimes be useful in finding boot-time bugs.
20 27
21 28
22MODULE PARAMETERS 29MODULE PARAMETERS
23 30
24This module has the following parameters: 31This module has the following parameters:
25 32
26nreaders This is the number of RCU reading threads supported. 33irqreaders Says to invoke RCU readers from irq level. This is currently
27 The default is twice the number of CPUs. Why twice? 34 done via timers. Defaults to "1" for variants of RCU that
28 To properly exercise RCU implementations with preemptible 35 permit this. (Or, more accurately, variants of RCU that do
29 read-side critical sections. 36 -not- permit this know to ignore this variable.)
30 37
31nfakewriters This is the number of RCU fake writer threads to run. Fake 38nfakewriters This is the number of RCU fake writer threads to run. Fake
32 writer threads repeatedly use the synchronous "wait for 39 writer threads repeatedly use the synchronous "wait for
@@ -37,6 +44,16 @@ nfakewriters This is the number of RCU fake writer threads to run. Fake
37 to trigger special cases caused by multiple writers, such as 44 to trigger special cases caused by multiple writers, such as
38 the synchronize_srcu() early return optimization. 45 the synchronize_srcu() early return optimization.
39 46
47nreaders This is the number of RCU reading threads supported.
48 The default is twice the number of CPUs. Why twice?
49 To properly exercise RCU implementations with preemptible
50 read-side critical sections.
51
52shuffle_interval
53 The number of seconds to keep the test threads affinitied
54 to a particular subset of the CPUs, defaults to 3 seconds.
55 Used in conjunction with test_no_idle_hz.
56
40stat_interval The number of seconds between output of torture 57stat_interval The number of seconds between output of torture
41 statistics (via printk()). Regardless of the interval, 58 statistics (via printk()). Regardless of the interval,
42 statistics are printed when the module is unloaded. 59 statistics are printed when the module is unloaded.
@@ -44,10 +61,11 @@ stat_interval The number of seconds between output of torture
44 be printed -only- when the module is unloaded, and this 61 be printed -only- when the module is unloaded, and this
45 is the default. 62 is the default.
46 63
47shuffle_interval 64stutter The length of time to run the test before pausing for this
48 The number of seconds to keep the test threads affinitied 65 same period of time. Defaults to "stutter=5", so as
49 to a particular subset of the CPUs, defaults to 5 seconds. 66 to run and pause for (roughly) five-second intervals.
50 Used in conjunction with test_no_idle_hz. 67 Specifying "stutter=0" causes the test to run continuously
68 without pausing, which is the old default behavior.
51 69
52test_no_idle_hz Whether or not to test the ability of RCU to operate in 70test_no_idle_hz Whether or not to test the ability of RCU to operate in
53 a kernel that disables the scheduling-clock interrupt to 71 a kernel that disables the scheduling-clock interrupt to
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index e0d6d99b8f9b..e04d643a9f57 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -1,3 +1,11 @@
1Please note that the "What is RCU?" LWN series is an excellent place
2to start learning about RCU:
3
41. What is RCU, Fundamentally? http://lwn.net/Articles/262464/
52. What is RCU? Part 2: Usage http://lwn.net/Articles/263130/
63. RCU part 3: the RCU API http://lwn.net/Articles/264090/
7
8
1What is RCU? 9What is RCU?
2 10
3RCU is a synchronization mechanism that was added to the Linux kernel 11RCU is a synchronization mechanism that was added to the Linux kernel
@@ -772,26 +780,18 @@ Linux-kernel source code, but it helps to have a full list of the
772APIs, since there does not appear to be a way to categorize them 780APIs, since there does not appear to be a way to categorize them
773in docbook. Here is the list, by category. 781in docbook. Here is the list, by category.
774 782
775Markers for RCU read-side critical sections:
776
777 rcu_read_lock
778 rcu_read_unlock
779 rcu_read_lock_bh
780 rcu_read_unlock_bh
781 srcu_read_lock
782 srcu_read_unlock
783
784RCU pointer/list traversal: 783RCU pointer/list traversal:
785 784
786 rcu_dereference 785 rcu_dereference
786 list_for_each_entry_rcu
787 hlist_for_each_entry_rcu
788
787 list_for_each_rcu (to be deprecated in favor of 789 list_for_each_rcu (to be deprecated in favor of
788 list_for_each_entry_rcu) 790 list_for_each_entry_rcu)
789 list_for_each_entry_rcu
790 list_for_each_continue_rcu (to be deprecated in favor of new 791 list_for_each_continue_rcu (to be deprecated in favor of new
791 list_for_each_entry_continue_rcu) 792 list_for_each_entry_continue_rcu)
792 hlist_for_each_entry_rcu
793 793
794RCU pointer update: 794RCU pointer/list update:
795 795
796 rcu_assign_pointer 796 rcu_assign_pointer
797 list_add_rcu 797 list_add_rcu
@@ -799,16 +799,36 @@ RCU pointer update:
799 list_del_rcu 799 list_del_rcu
800 list_replace_rcu 800 list_replace_rcu
801 hlist_del_rcu 801 hlist_del_rcu
802 hlist_add_after_rcu
803 hlist_add_before_rcu
802 hlist_add_head_rcu 804 hlist_add_head_rcu
805 hlist_replace_rcu
806 list_splice_init_rcu()
803 807
804RCU grace period: 808RCU: Critical sections Grace period Barrier
809
810 rcu_read_lock synchronize_net rcu_barrier
811 rcu_read_unlock synchronize_rcu
812 call_rcu
813
814
815bh: Critical sections Grace period Barrier
816
817 rcu_read_lock_bh call_rcu_bh rcu_barrier_bh
818 rcu_read_unlock_bh
819
820
821sched: Critical sections Grace period Barrier
822
823 [preempt_disable] synchronize_sched rcu_barrier_sched
824 [and friends] call_rcu_sched
825
826
827SRCU: Critical sections Grace period Barrier
828
829 srcu_read_lock synchronize_srcu N/A
830 srcu_read_unlock
805 831
806 synchronize_net
807 synchronize_sched
808 synchronize_rcu
809 synchronize_srcu
810 call_rcu
811 call_rcu_bh
812 832
813See the comment headers in the source code (or the docbook generated 833See the comment headers in the source code (or the docbook generated
814from them) for more information. 834from them) for more information.
diff --git a/MAINTAINERS b/MAINTAINERS
index 6198fa3deb99..fc6c005c531a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1777,6 +1777,11 @@ M: hch@infradead.org
1777W: ftp://ftp.openlinux.org/pub/people/hch/vxfs 1777W: ftp://ftp.openlinux.org/pub/people/hch/vxfs
1778S: Maintained 1778S: Maintained
1779 1779
1780FTRACE
1781P: Steven Rostedt
1782M: srostedt@redhat.com
1783S: Maintained
1784
1780FUJITSU FR-V (FRV) PORT 1785FUJITSU FR-V (FRV) PORT
1781P: David Howells 1786P: David Howells
1782M: dhowells@redhat.com 1787M: dhowells@redhat.com
diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c
index 53351c3cd7b1..96c31b4180c3 100644
--- a/arch/ia64/sn/kernel/irq.c
+++ b/arch/ia64/sn/kernel/irq.c
@@ -11,6 +11,7 @@
11#include <linux/irq.h> 11#include <linux/irq.h>
12#include <linux/spinlock.h> 12#include <linux/spinlock.h>
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/rculist.h>
14#include <asm/sn/addrs.h> 15#include <asm/sn/addrs.h>
15#include <asm/sn/arch.h> 16#include <asm/sn/arch.h>
16#include <asm/sn/intr.h> 17#include <asm/sn/intr.h>
diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index c6e772fc5ccd..095c798d3170 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -23,6 +23,7 @@
23 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 23 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
24 * 24 *
25 */ 25 */
26#include <linux/rculist.h>
26#include <linux/kernel.h> 27#include <linux/kernel.h>
27#include <linux/async_tx.h> 28#include <linux/async_tx.h>
28 29
diff --git a/drivers/firewire/fw-card.c b/drivers/firewire/fw-card.c
index 5b4c0d9f5173..da873d795aad 100644
--- a/drivers/firewire/fw-card.c
+++ b/drivers/firewire/fw-card.c
@@ -16,12 +16,15 @@
16 * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 */ 17 */
18 18
19#include <linux/module.h> 19#include <linux/completion.h>
20#include <linux/errno.h> 20#include <linux/crc-itu-t.h>
21#include <linux/delay.h> 21#include <linux/delay.h>
22#include <linux/device.h> 22#include <linux/device.h>
23#include <linux/errno.h>
24#include <linux/kref.h>
25#include <linux/module.h>
23#include <linux/mutex.h> 26#include <linux/mutex.h>
24#include <linux/crc-itu-t.h> 27
25#include "fw-transaction.h" 28#include "fw-transaction.h"
26#include "fw-topology.h" 29#include "fw-topology.h"
27#include "fw-device.h" 30#include "fw-device.h"
@@ -396,14 +399,16 @@ fw_card_initialize(struct fw_card *card, const struct fw_card_driver *driver,
396{ 399{
397 static atomic_t index = ATOMIC_INIT(-1); 400 static atomic_t index = ATOMIC_INIT(-1);
398 401
399 atomic_set(&card->device_count, 0);
400 card->index = atomic_inc_return(&index); 402 card->index = atomic_inc_return(&index);
401 card->driver = driver; 403 card->driver = driver;
402 card->device = device; 404 card->device = device;
403 card->current_tlabel = 0; 405 card->current_tlabel = 0;
404 card->tlabel_mask = 0; 406 card->tlabel_mask = 0;
405 card->color = 0; 407 card->color = 0;
408 card->broadcast_channel = BROADCAST_CHANNEL_INITIAL;
406 409
410 kref_init(&card->kref);
411 init_completion(&card->done);
407 INIT_LIST_HEAD(&card->transaction_list); 412 INIT_LIST_HEAD(&card->transaction_list);
408 spin_lock_init(&card->lock); 413 spin_lock_init(&card->lock);
409 setup_timer(&card->flush_timer, 414 setup_timer(&card->flush_timer,
@@ -496,7 +501,6 @@ dummy_enable_phys_dma(struct fw_card *card,
496} 501}
497 502
498static struct fw_card_driver dummy_driver = { 503static struct fw_card_driver dummy_driver = {
499 .name = "dummy",
500 .enable = dummy_enable, 504 .enable = dummy_enable,
501 .update_phy_reg = dummy_update_phy_reg, 505 .update_phy_reg = dummy_update_phy_reg,
502 .set_config_rom = dummy_set_config_rom, 506 .set_config_rom = dummy_set_config_rom,
@@ -507,6 +511,14 @@ static struct fw_card_driver dummy_driver = {
507}; 511};
508 512
509void 513void
514fw_card_release(struct kref *kref)
515{
516 struct fw_card *card = container_of(kref, struct fw_card, kref);
517
518 complete(&card->done);
519}
520
521void
510fw_core_remove_card(struct fw_card *card) 522fw_core_remove_card(struct fw_card *card)
511{ 523{
512 card->driver->update_phy_reg(card, 4, 524 card->driver->update_phy_reg(card, 4,
@@ -521,12 +533,10 @@ fw_core_remove_card(struct fw_card *card)
521 card->driver = &dummy_driver; 533 card->driver = &dummy_driver;
522 534
523 fw_destroy_nodes(card); 535 fw_destroy_nodes(card);
524 /* 536
525 * Wait for all device workqueue jobs to finish. Otherwise the 537 /* Wait for all users, especially device workqueue jobs, to finish. */
526 * firewire-core module could be unloaded before the jobs ran. 538 fw_card_put(card);
527 */ 539 wait_for_completion(&card->done);
528 while (atomic_read(&card->device_count) > 0)
529 msleep(100);
530 540
531 cancel_delayed_work_sync(&card->work); 541 cancel_delayed_work_sync(&card->work);
532 fw_flush_transactions(card); 542 fw_flush_transactions(card);
diff --git a/drivers/firewire/fw-device.c b/drivers/firewire/fw-device.c
index d9c8daf7ae7d..0855fb5568e8 100644
--- a/drivers/firewire/fw-device.c
+++ b/drivers/firewire/fw-device.c
@@ -168,7 +168,7 @@ static void fw_device_release(struct device *dev)
168 fw_node_put(device->node); 168 fw_node_put(device->node);
169 kfree(device->config_rom); 169 kfree(device->config_rom);
170 kfree(device); 170 kfree(device);
171 atomic_dec(&card->device_count); 171 fw_card_put(card);
172} 172}
173 173
174int fw_device_enable_phys_dma(struct fw_device *device) 174int fw_device_enable_phys_dma(struct fw_device *device)
@@ -946,8 +946,7 @@ void fw_node_event(struct fw_card *card, struct fw_node *node, int event)
946 */ 946 */
947 device_initialize(&device->device); 947 device_initialize(&device->device);
948 atomic_set(&device->state, FW_DEVICE_INITIALIZING); 948 atomic_set(&device->state, FW_DEVICE_INITIALIZING);
949 atomic_inc(&card->device_count); 949 device->card = fw_card_get(card);
950 device->card = card;
951 device->node = fw_node_get(node); 950 device->node = fw_node_get(node);
952 device->node_id = node->node_id; 951 device->node_id = node->node_id;
953 device->generation = card->generation; 952 device->generation = card->generation;
diff --git a/drivers/firewire/fw-device.h b/drivers/firewire/fw-device.h
index 5f131f5129da..42305bbac72f 100644
--- a/drivers/firewire/fw-device.h
+++ b/drivers/firewire/fw-device.h
@@ -62,7 +62,6 @@ struct fw_device {
62 bool cmc; 62 bool cmc;
63 struct fw_card *card; 63 struct fw_card *card;
64 struct device device; 64 struct device device;
65 struct list_head link;
66 struct list_head client_list; 65 struct list_head client_list;
67 u32 *config_rom; 66 u32 *config_rom;
68 size_t config_rom_length; 67 size_t config_rom_length;
diff --git a/drivers/firewire/fw-ohci.c b/drivers/firewire/fw-ohci.c
index 0b66306af479..333b12544dd1 100644
--- a/drivers/firewire/fw-ohci.c
+++ b/drivers/firewire/fw-ohci.c
@@ -2292,7 +2292,6 @@ ohci_queue_iso(struct fw_iso_context *base,
2292} 2292}
2293 2293
2294static const struct fw_card_driver ohci_driver = { 2294static const struct fw_card_driver ohci_driver = {
2295 .name = ohci_driver_name,
2296 .enable = ohci_enable, 2295 .enable = ohci_enable,
2297 .update_phy_reg = ohci_update_phy_reg, 2296 .update_phy_reg = ohci_update_phy_reg,
2298 .set_config_rom = ohci_set_config_rom, 2297 .set_config_rom = ohci_set_config_rom,
diff --git a/drivers/firewire/fw-sbp2.c b/drivers/firewire/fw-sbp2.c
index 227d2e036cd8..53fc5a641e6d 100644
--- a/drivers/firewire/fw-sbp2.c
+++ b/drivers/firewire/fw-sbp2.c
@@ -86,6 +86,11 @@ MODULE_PARM_DESC(exclusive_login, "Exclusive login to sbp2 device "
86 * - delay inquiry 86 * - delay inquiry
87 * Wait extra SBP2_INQUIRY_DELAY seconds after login before SCSI inquiry. 87 * Wait extra SBP2_INQUIRY_DELAY seconds after login before SCSI inquiry.
88 * 88 *
89 * - power condition
90 * Set the power condition field in the START STOP UNIT commands sent by
91 * sd_mod on suspend, resume, and shutdown (if manage_start_stop is on).
92 * Some disks need this to spin down or to resume properly.
93 *
89 * - override internal blacklist 94 * - override internal blacklist
90 * Instead of adding to the built-in blacklist, use only the workarounds 95 * Instead of adding to the built-in blacklist, use only the workarounds
91 * specified in the module load parameter. 96 * specified in the module load parameter.
@@ -97,6 +102,7 @@ MODULE_PARM_DESC(exclusive_login, "Exclusive login to sbp2 device "
97#define SBP2_WORKAROUND_FIX_CAPACITY 0x8 102#define SBP2_WORKAROUND_FIX_CAPACITY 0x8
98#define SBP2_WORKAROUND_DELAY_INQUIRY 0x10 103#define SBP2_WORKAROUND_DELAY_INQUIRY 0x10
99#define SBP2_INQUIRY_DELAY 12 104#define SBP2_INQUIRY_DELAY 12
105#define SBP2_WORKAROUND_POWER_CONDITION 0x20
100#define SBP2_WORKAROUND_OVERRIDE 0x100 106#define SBP2_WORKAROUND_OVERRIDE 0x100
101 107
102static int sbp2_param_workarounds; 108static int sbp2_param_workarounds;
@@ -107,6 +113,8 @@ MODULE_PARM_DESC(workarounds, "Work around device bugs (default = 0"
107 ", skip mode page 8 = " __stringify(SBP2_WORKAROUND_MODE_SENSE_8) 113 ", skip mode page 8 = " __stringify(SBP2_WORKAROUND_MODE_SENSE_8)
108 ", fix capacity = " __stringify(SBP2_WORKAROUND_FIX_CAPACITY) 114 ", fix capacity = " __stringify(SBP2_WORKAROUND_FIX_CAPACITY)
109 ", delay inquiry = " __stringify(SBP2_WORKAROUND_DELAY_INQUIRY) 115 ", delay inquiry = " __stringify(SBP2_WORKAROUND_DELAY_INQUIRY)
116 ", set power condition in start stop unit = "
117 __stringify(SBP2_WORKAROUND_POWER_CONDITION)
110 ", override internal blacklist = " __stringify(SBP2_WORKAROUND_OVERRIDE) 118 ", override internal blacklist = " __stringify(SBP2_WORKAROUND_OVERRIDE)
111 ", or a combination)"); 119 ", or a combination)");
112 120
@@ -310,18 +318,25 @@ static const struct {
310 .firmware_revision = 0x002800, 318 .firmware_revision = 0x002800,
311 .model = 0x001010, 319 .model = 0x001010,
312 .workarounds = SBP2_WORKAROUND_INQUIRY_36 | 320 .workarounds = SBP2_WORKAROUND_INQUIRY_36 |
313 SBP2_WORKAROUND_MODE_SENSE_8, 321 SBP2_WORKAROUND_MODE_SENSE_8 |
322 SBP2_WORKAROUND_POWER_CONDITION,
314 }, 323 },
315 /* DViCO Momobay FX-3A with TSB42AA9A bridge */ { 324 /* DViCO Momobay FX-3A with TSB42AA9A bridge */ {
316 .firmware_revision = 0x002800, 325 .firmware_revision = 0x002800,
317 .model = 0x000000, 326 .model = 0x000000,
318 .workarounds = SBP2_WORKAROUND_DELAY_INQUIRY, 327 .workarounds = SBP2_WORKAROUND_DELAY_INQUIRY |
328 SBP2_WORKAROUND_POWER_CONDITION,
319 }, 329 },
320 /* Initio bridges, actually only needed for some older ones */ { 330 /* Initio bridges, actually only needed for some older ones */ {
321 .firmware_revision = 0x000200, 331 .firmware_revision = 0x000200,
322 .model = ~0, 332 .model = ~0,
323 .workarounds = SBP2_WORKAROUND_INQUIRY_36, 333 .workarounds = SBP2_WORKAROUND_INQUIRY_36,
324 }, 334 },
335 /* PL-3507 bridge with Prolific firmware */ {
336 .firmware_revision = 0x012800,
337 .model = ~0,
338 .workarounds = SBP2_WORKAROUND_POWER_CONDITION,
339 },
325 /* Symbios bridge */ { 340 /* Symbios bridge */ {
326 .firmware_revision = 0xa0b800, 341 .firmware_revision = 0xa0b800,
327 .model = ~0, 342 .model = ~0,
@@ -1530,6 +1545,9 @@ static int sbp2_scsi_slave_configure(struct scsi_device *sdev)
1530 1545
1531 sdev->use_10_for_rw = 1; 1546 sdev->use_10_for_rw = 1;
1532 1547
1548 if (sbp2_param_exclusive_login)
1549 sdev->manage_start_stop = 1;
1550
1533 if (sdev->type == TYPE_ROM) 1551 if (sdev->type == TYPE_ROM)
1534 sdev->use_10_for_ms = 1; 1552 sdev->use_10_for_ms = 1;
1535 1553
@@ -1540,6 +1558,9 @@ static int sbp2_scsi_slave_configure(struct scsi_device *sdev)
1540 if (lu->tgt->workarounds & SBP2_WORKAROUND_FIX_CAPACITY) 1558 if (lu->tgt->workarounds & SBP2_WORKAROUND_FIX_CAPACITY)
1541 sdev->fix_capacity = 1; 1559 sdev->fix_capacity = 1;
1542 1560
1561 if (lu->tgt->workarounds & SBP2_WORKAROUND_POWER_CONDITION)
1562 sdev->start_stop_pwr_cond = 1;
1563
1543 if (lu->tgt->workarounds & SBP2_WORKAROUND_128K_MAX_TRANS) 1564 if (lu->tgt->workarounds & SBP2_WORKAROUND_128K_MAX_TRANS)
1544 blk_queue_max_sectors(sdev->request_queue, 128 * 1024 / 512); 1565 blk_queue_max_sectors(sdev->request_queue, 128 * 1024 / 512);
1545 1566
diff --git a/drivers/firewire/fw-transaction.c b/drivers/firewire/fw-transaction.c
index 03ae8a77c479..40db80752272 100644
--- a/drivers/firewire/fw-transaction.c
+++ b/drivers/firewire/fw-transaction.c
@@ -55,6 +55,9 @@
55#define HEADER_GET_DATA_LENGTH(q) (((q) >> 16) & 0xffff) 55#define HEADER_GET_DATA_LENGTH(q) (((q) >> 16) & 0xffff)
56#define HEADER_GET_EXTENDED_TCODE(q) (((q) >> 0) & 0xffff) 56#define HEADER_GET_EXTENDED_TCODE(q) (((q) >> 0) & 0xffff)
57 57
58#define HEADER_DESTINATION_IS_BROADCAST(q) \
59 (((q) & HEADER_DESTINATION(0x3f)) == HEADER_DESTINATION(0x3f))
60
58#define PHY_CONFIG_GAP_COUNT(gap_count) (((gap_count) << 16) | (1 << 22)) 61#define PHY_CONFIG_GAP_COUNT(gap_count) (((gap_count) << 16) | (1 << 22))
59#define PHY_CONFIG_ROOT_ID(node_id) ((((node_id) & 0x3f) << 24) | (1 << 23)) 62#define PHY_CONFIG_ROOT_ID(node_id) ((((node_id) & 0x3f) << 24) | (1 << 23))
60#define PHY_IDENTIFIER(id) ((id) << 30) 63#define PHY_IDENTIFIER(id) ((id) << 30)
@@ -624,12 +627,9 @@ allocate_request(struct fw_packet *p)
624void 627void
625fw_send_response(struct fw_card *card, struct fw_request *request, int rcode) 628fw_send_response(struct fw_card *card, struct fw_request *request, int rcode)
626{ 629{
627 /* 630 /* unified transaction or broadcast transaction: don't respond */
628 * Broadcast packets are reported as ACK_COMPLETE, so this 631 if (request->ack != ACK_PENDING ||
629 * check is sufficient to ensure we don't send response to 632 HEADER_DESTINATION_IS_BROADCAST(request->request_header[0])) {
630 * broadcast packets or posted writes.
631 */
632 if (request->ack != ACK_PENDING) {
633 kfree(request); 633 kfree(request);
634 return; 634 return;
635 } 635 }
@@ -817,12 +817,13 @@ handle_registers(struct fw_card *card, struct fw_request *request,
817 int reg = offset & ~CSR_REGISTER_BASE; 817 int reg = offset & ~CSR_REGISTER_BASE;
818 unsigned long long bus_time; 818 unsigned long long bus_time;
819 __be32 *data = payload; 819 __be32 *data = payload;
820 int rcode = RCODE_COMPLETE;
820 821
821 switch (reg) { 822 switch (reg) {
822 case CSR_CYCLE_TIME: 823 case CSR_CYCLE_TIME:
823 case CSR_BUS_TIME: 824 case CSR_BUS_TIME:
824 if (!TCODE_IS_READ_REQUEST(tcode) || length != 4) { 825 if (!TCODE_IS_READ_REQUEST(tcode) || length != 4) {
825 fw_send_response(card, request, RCODE_TYPE_ERROR); 826 rcode = RCODE_TYPE_ERROR;
826 break; 827 break;
827 } 828 }
828 829
@@ -831,7 +832,17 @@ handle_registers(struct fw_card *card, struct fw_request *request,
831 *data = cpu_to_be32(bus_time); 832 *data = cpu_to_be32(bus_time);
832 else 833 else
833 *data = cpu_to_be32(bus_time >> 25); 834 *data = cpu_to_be32(bus_time >> 25);
834 fw_send_response(card, request, RCODE_COMPLETE); 835 break;
836
837 case CSR_BROADCAST_CHANNEL:
838 if (tcode == TCODE_READ_QUADLET_REQUEST)
839 *data = cpu_to_be32(card->broadcast_channel);
840 else if (tcode == TCODE_WRITE_QUADLET_REQUEST)
841 card->broadcast_channel =
842 (be32_to_cpu(*data) & BROADCAST_CHANNEL_VALID) |
843 BROADCAST_CHANNEL_INITIAL;
844 else
845 rcode = RCODE_TYPE_ERROR;
835 break; 846 break;
836 847
837 case CSR_BUS_MANAGER_ID: 848 case CSR_BUS_MANAGER_ID:
@@ -850,10 +861,13 @@ handle_registers(struct fw_card *card, struct fw_request *request,
850 861
851 case CSR_BUSY_TIMEOUT: 862 case CSR_BUSY_TIMEOUT:
852 /* FIXME: Implement this. */ 863 /* FIXME: Implement this. */
864
853 default: 865 default:
854 fw_send_response(card, request, RCODE_ADDRESS_ERROR); 866 rcode = RCODE_ADDRESS_ERROR;
855 break; 867 break;
856 } 868 }
869
870 fw_send_response(card, request, rcode);
857} 871}
858 872
859static struct fw_address_handler registers = { 873static struct fw_address_handler registers = {
diff --git a/drivers/firewire/fw-transaction.h b/drivers/firewire/fw-transaction.h
index 04d3854f6560..2ae1b0d6cb7b 100644
--- a/drivers/firewire/fw-transaction.h
+++ b/drivers/firewire/fw-transaction.h
@@ -19,14 +19,15 @@
19#ifndef __fw_transaction_h 19#ifndef __fw_transaction_h
20#define __fw_transaction_h 20#define __fw_transaction_h
21 21
22#include <linux/completion.h>
22#include <linux/device.h> 23#include <linux/device.h>
23#include <linux/timer.h>
24#include <linux/interrupt.h>
25#include <linux/list.h>
26#include <linux/fs.h>
27#include <linux/dma-mapping.h> 24#include <linux/dma-mapping.h>
28#include <linux/firewire-constants.h> 25#include <linux/firewire-constants.h>
29#include <asm/atomic.h> 26#include <linux/kref.h>
27#include <linux/list.h>
28#include <linux/spinlock_types.h>
29#include <linux/timer.h>
30#include <linux/workqueue.h>
30 31
31#define TCODE_IS_READ_REQUEST(tcode) (((tcode) & ~1) == 4) 32#define TCODE_IS_READ_REQUEST(tcode) (((tcode) & ~1) == 4)
32#define TCODE_IS_BLOCK_PACKET(tcode) (((tcode) & 1) != 0) 33#define TCODE_IS_BLOCK_PACKET(tcode) (((tcode) & 1) != 0)
@@ -80,6 +81,9 @@
80#define CSR_SPEED_MAP 0x2000 81#define CSR_SPEED_MAP 0x2000
81#define CSR_SPEED_MAP_END 0x3000 82#define CSR_SPEED_MAP_END 0x3000
82 83
84#define BROADCAST_CHANNEL_INITIAL (1 << 31 | 31)
85#define BROADCAST_CHANNEL_VALID (1 << 30)
86
83#define fw_notify(s, args...) printk(KERN_NOTICE KBUILD_MODNAME ": " s, ## args) 87#define fw_notify(s, args...) printk(KERN_NOTICE KBUILD_MODNAME ": " s, ## args)
84#define fw_error(s, args...) printk(KERN_ERR KBUILD_MODNAME ": " s, ## args) 88#define fw_error(s, args...) printk(KERN_ERR KBUILD_MODNAME ": " s, ## args)
85 89
@@ -216,7 +220,8 @@ extern struct bus_type fw_bus_type;
216struct fw_card { 220struct fw_card {
217 const struct fw_card_driver *driver; 221 const struct fw_card_driver *driver;
218 struct device *device; 222 struct device *device;
219 atomic_t device_count; 223 struct kref kref;
224 struct completion done;
220 225
221 int node_id; 226 int node_id;
222 int generation; 227 int generation;
@@ -236,6 +241,7 @@ struct fw_card {
236 */ 241 */
237 int self_id_count; 242 int self_id_count;
238 u32 topology_map[252 + 3]; 243 u32 topology_map[252 + 3];
244 u32 broadcast_channel;
239 245
240 spinlock_t lock; /* Take this lock when handling the lists in 246 spinlock_t lock; /* Take this lock when handling the lists in
241 * this struct. */ 247 * this struct. */
@@ -256,6 +262,20 @@ struct fw_card {
256 int bm_generation; 262 int bm_generation;
257}; 263};
258 264
265static inline struct fw_card *fw_card_get(struct fw_card *card)
266{
267 kref_get(&card->kref);
268
269 return card;
270}
271
272void fw_card_release(struct kref *kref);
273
274static inline void fw_card_put(struct fw_card *card)
275{
276 kref_put(&card->kref, fw_card_release);
277}
278
259/* 279/*
260 * The iso packet format allows for an immediate header/payload part 280 * The iso packet format allows for an immediate header/payload part
261 * stored in 'header' immediately after the packet info plus an 281 * stored in 'header' immediately after the packet info plus an
@@ -348,8 +368,6 @@ int
348fw_iso_context_stop(struct fw_iso_context *ctx); 368fw_iso_context_stop(struct fw_iso_context *ctx);
349 369
350struct fw_card_driver { 370struct fw_card_driver {
351 const char *name;
352
353 /* 371 /*
354 * Enable the given card with the given initial config rom. 372 * Enable the given card with the given initial config rom.
355 * This function is expected to activate the card, and either 373 * This function is expected to activate the card, and either
diff --git a/drivers/ieee1394/csr1212.c b/drivers/ieee1394/csr1212.c
index e8122def164d..9f95337139e3 100644
--- a/drivers/ieee1394/csr1212.c
+++ b/drivers/ieee1394/csr1212.c
@@ -1049,6 +1049,24 @@ int csr1212_read(struct csr1212_csr *csr, u32 offset, void *buffer, u32 len)
1049 return -ENOENT; 1049 return -ENOENT;
1050} 1050}
1051 1051
1052/*
1053 * Apparently there are many different wrong implementations of the CRC
1054 * algorithm. We don't fail, we just warn... approximately once per GUID.
1055 */
1056static void
1057csr1212_check_crc(const u32 *buffer, size_t length, u16 crc, __be32 *guid)
1058{
1059 static u64 last_bad_eui64;
1060 u64 eui64 = ((u64)be32_to_cpu(guid[0]) << 32) | be32_to_cpu(guid[1]);
1061
1062 if (csr1212_crc16(buffer, length) == crc ||
1063 csr1212_msft_crc16(buffer, length) == crc ||
1064 eui64 == last_bad_eui64)
1065 return;
1066
1067 printk(KERN_DEBUG "ieee1394: config ROM CRC error\n");
1068 last_bad_eui64 = eui64;
1069}
1052 1070
1053/* Parse a chunk of data as a Config ROM */ 1071/* Parse a chunk of data as a Config ROM */
1054 1072
@@ -1092,11 +1110,8 @@ static int csr1212_parse_bus_info_block(struct csr1212_csr *csr)
1092 return ret; 1110 return ret;
1093 } 1111 }
1094 1112
1095 /* Apparently there are many different wrong implementations of the CRC 1113 csr1212_check_crc(bi->data, bi->crc_length, bi->crc,
1096 * algorithm. We don't fail, we just warn. */ 1114 &csr->bus_info_data[3]);
1097 if ((csr1212_crc16(bi->data, bi->crc_length) != bi->crc) &&
1098 (csr1212_msft_crc16(bi->data, bi->crc_length) != bi->crc))
1099 printk(KERN_DEBUG "IEEE 1394 device has ROM CRC error\n");
1100 1115
1101 cr = CSR1212_MALLOC(sizeof(*cr)); 1116 cr = CSR1212_MALLOC(sizeof(*cr));
1102 if (!cr) 1117 if (!cr)
@@ -1205,11 +1220,8 @@ int csr1212_parse_keyval(struct csr1212_keyval *kv,
1205 &cache->data[bytes_to_quads(kv->offset - cache->offset)]; 1220 &cache->data[bytes_to_quads(kv->offset - cache->offset)];
1206 kvi_len = be16_to_cpu(kvi->length); 1221 kvi_len = be16_to_cpu(kvi->length);
1207 1222
1208 /* Apparently there are many different wrong implementations of the CRC 1223 /* GUID is wrong in here in case of extended ROM. We don't care. */
1209 * algorithm. We don't fail, we just warn. */ 1224 csr1212_check_crc(kvi->data, kvi_len, kvi->crc, &cache->data[3]);
1210 if ((csr1212_crc16(kvi->data, kvi_len) != kvi->crc) &&
1211 (csr1212_msft_crc16(kvi->data, kvi_len) != kvi->crc))
1212 printk(KERN_DEBUG "IEEE 1394 device has ROM CRC error\n");
1213 1225
1214 switch (kv->key.type) { 1226 switch (kv->key.type) {
1215 case CSR1212_KV_TYPE_DIRECTORY: 1227 case CSR1212_KV_TYPE_DIRECTORY:
diff --git a/drivers/ieee1394/dma.c b/drivers/ieee1394/dma.c
index 73685e7dc7e4..1aba8c13fe8f 100644
--- a/drivers/ieee1394/dma.c
+++ b/drivers/ieee1394/dma.c
@@ -274,7 +274,7 @@ int dma_region_mmap(struct dma_region *dma, struct file *file,
274 vma->vm_ops = &dma_region_vm_ops; 274 vma->vm_ops = &dma_region_vm_ops;
275 vma->vm_private_data = dma; 275 vma->vm_private_data = dma;
276 vma->vm_file = file; 276 vma->vm_file = file;
277 vma->vm_flags |= VM_RESERVED; 277 vma->vm_flags |= VM_RESERVED | VM_ALWAYSDUMP;
278 278
279 return 0; 279 return 0;
280} 280}
diff --git a/drivers/ieee1394/highlevel.c b/drivers/ieee1394/highlevel.c
index fa2bfec0fca2..918ffc4fc8ac 100644
--- a/drivers/ieee1394/highlevel.c
+++ b/drivers/ieee1394/highlevel.c
@@ -228,10 +228,8 @@ void hpsb_register_highlevel(struct hpsb_highlevel *hl)
228{ 228{
229 unsigned long flags; 229 unsigned long flags;
230 230
231 hpsb_init_highlevel(hl);
231 INIT_LIST_HEAD(&hl->addr_list); 232 INIT_LIST_HEAD(&hl->addr_list);
232 INIT_LIST_HEAD(&hl->host_info_list);
233
234 rwlock_init(&hl->host_info_lock);
235 233
236 down_write(&hl_drivers_sem); 234 down_write(&hl_drivers_sem);
237 list_add_tail(&hl->hl_list, &hl_drivers); 235 list_add_tail(&hl->hl_list, &hl_drivers);
diff --git a/drivers/ieee1394/highlevel.h b/drivers/ieee1394/highlevel.h
index eb9fe321e09a..bc5d0854c17e 100644
--- a/drivers/ieee1394/highlevel.h
+++ b/drivers/ieee1394/highlevel.h
@@ -2,7 +2,7 @@
2#define IEEE1394_HIGHLEVEL_H 2#define IEEE1394_HIGHLEVEL_H
3 3
4#include <linux/list.h> 4#include <linux/list.h>
5#include <linux/spinlock_types.h> 5#include <linux/spinlock.h>
6#include <linux/types.h> 6#include <linux/types.h>
7 7
8struct module; 8struct module;
@@ -103,6 +103,17 @@ int highlevel_lock64(struct hpsb_host *host, int nodeid, octlet_t *store,
103void highlevel_fcp_request(struct hpsb_host *host, int nodeid, int direction, 103void highlevel_fcp_request(struct hpsb_host *host, int nodeid, int direction,
104 void *data, size_t length); 104 void *data, size_t length);
105 105
106/**
107 * hpsb_init_highlevel - initialize a struct hpsb_highlevel
108 *
109 * This is only necessary if hpsb_get_hostinfo_bykey can be called
110 * before hpsb_register_highlevel.
111 */
112static inline void hpsb_init_highlevel(struct hpsb_highlevel *hl)
113{
114 rwlock_init(&hl->host_info_lock);
115 INIT_LIST_HEAD(&hl->host_info_list);
116}
106void hpsb_register_highlevel(struct hpsb_highlevel *hl); 117void hpsb_register_highlevel(struct hpsb_highlevel *hl);
107void hpsb_unregister_highlevel(struct hpsb_highlevel *hl); 118void hpsb_unregister_highlevel(struct hpsb_highlevel *hl);
108 119
diff --git a/drivers/ieee1394/raw1394.c b/drivers/ieee1394/raw1394.c
index ec2a0adbedb2..96f2847b0405 100644
--- a/drivers/ieee1394/raw1394.c
+++ b/drivers/ieee1394/raw1394.c
@@ -2549,8 +2549,8 @@ static int raw1394_mmap(struct file *file, struct vm_area_struct *vma)
2549} 2549}
2550 2550
2551/* ioctl is only used for rawiso operations */ 2551/* ioctl is only used for rawiso operations */
2552static int raw1394_ioctl(struct inode *inode, struct file *file, 2552static long do_raw1394_ioctl(struct file *file, unsigned int cmd,
2553 unsigned int cmd, unsigned long arg) 2553 unsigned long arg)
2554{ 2554{
2555 struct file_info *fi = file->private_data; 2555 struct file_info *fi = file->private_data;
2556 void __user *argp = (void __user *)arg; 2556 void __user *argp = (void __user *)arg;
@@ -2656,6 +2656,16 @@ static int raw1394_ioctl(struct inode *inode, struct file *file,
2656 return -EINVAL; 2656 return -EINVAL;
2657} 2657}
2658 2658
2659static long raw1394_ioctl(struct file *file, unsigned int cmd,
2660 unsigned long arg)
2661{
2662 long ret;
2663 lock_kernel();
2664 ret = do_raw1394_ioctl(file, cmd, arg);
2665 unlock_kernel();
2666 return ret;
2667}
2668
2659#ifdef CONFIG_COMPAT 2669#ifdef CONFIG_COMPAT
2660struct raw1394_iso_packets32 { 2670struct raw1394_iso_packets32 {
2661 __u32 n_packets; 2671 __u32 n_packets;
@@ -2690,7 +2700,7 @@ static long raw1394_iso_xmit_recv_packets32(struct file *file, unsigned int cmd,
2690 !copy_from_user(&infos32, &arg->infos, sizeof infos32)) { 2700 !copy_from_user(&infos32, &arg->infos, sizeof infos32)) {
2691 infos = compat_ptr(infos32); 2701 infos = compat_ptr(infos32);
2692 if (!copy_to_user(&dst->infos, &infos, sizeof infos)) 2702 if (!copy_to_user(&dst->infos, &infos, sizeof infos))
2693 err = raw1394_ioctl(NULL, file, cmd, (unsigned long)dst); 2703 err = do_raw1394_ioctl(file, cmd, (unsigned long)dst);
2694 } 2704 }
2695 return err; 2705 return err;
2696} 2706}
@@ -2731,7 +2741,7 @@ static long raw1394_compat_ioctl(struct file *file,
2731 case RAW1394_IOC_ISO_GET_STATUS: 2741 case RAW1394_IOC_ISO_GET_STATUS:
2732 case RAW1394_IOC_ISO_SHUTDOWN: 2742 case RAW1394_IOC_ISO_SHUTDOWN:
2733 case RAW1394_IOC_ISO_QUEUE_ACTIVITY: 2743 case RAW1394_IOC_ISO_QUEUE_ACTIVITY:
2734 err = raw1394_ioctl(NULL, file, cmd, arg); 2744 err = do_raw1394_ioctl(file, cmd, arg);
2735 break; 2745 break;
2736 /* These request have different format. */ 2746 /* These request have different format. */
2737 case RAW1394_IOC_ISO_RECV_PACKETS32: 2747 case RAW1394_IOC_ISO_RECV_PACKETS32:
@@ -2984,7 +2994,7 @@ static const struct file_operations raw1394_fops = {
2984 .read = raw1394_read, 2994 .read = raw1394_read,
2985 .write = raw1394_write, 2995 .write = raw1394_write,
2986 .mmap = raw1394_mmap, 2996 .mmap = raw1394_mmap,
2987 .ioctl = raw1394_ioctl, 2997 .unlocked_ioctl = raw1394_ioctl,
2988#ifdef CONFIG_COMPAT 2998#ifdef CONFIG_COMPAT
2989 .compat_ioctl = raw1394_compat_ioctl, 2999 .compat_ioctl = raw1394_compat_ioctl,
2990#endif 3000#endif
diff --git a/drivers/ieee1394/sbp2.c b/drivers/ieee1394/sbp2.c
index a5ceff287a28..9cbf3154d243 100644
--- a/drivers/ieee1394/sbp2.c
+++ b/drivers/ieee1394/sbp2.c
@@ -186,6 +186,11 @@ MODULE_PARM_DESC(exclusive_login, "Exclusive login to sbp2 device "
186 * - delay inquiry 186 * - delay inquiry
187 * Wait extra SBP2_INQUIRY_DELAY seconds after login before SCSI inquiry. 187 * Wait extra SBP2_INQUIRY_DELAY seconds after login before SCSI inquiry.
188 * 188 *
189 * - power condition
190 * Set the power condition field in the START STOP UNIT commands sent by
191 * sd_mod on suspend, resume, and shutdown (if manage_start_stop is on).
192 * Some disks need this to spin down or to resume properly.
193 *
189 * - override internal blacklist 194 * - override internal blacklist
190 * Instead of adding to the built-in blacklist, use only the workarounds 195 * Instead of adding to the built-in blacklist, use only the workarounds
191 * specified in the module load parameter. 196 * specified in the module load parameter.
@@ -199,6 +204,8 @@ MODULE_PARM_DESC(workarounds, "Work around device bugs (default = 0"
199 ", skip mode page 8 = " __stringify(SBP2_WORKAROUND_MODE_SENSE_8) 204 ", skip mode page 8 = " __stringify(SBP2_WORKAROUND_MODE_SENSE_8)
200 ", fix capacity = " __stringify(SBP2_WORKAROUND_FIX_CAPACITY) 205 ", fix capacity = " __stringify(SBP2_WORKAROUND_FIX_CAPACITY)
201 ", delay inquiry = " __stringify(SBP2_WORKAROUND_DELAY_INQUIRY) 206 ", delay inquiry = " __stringify(SBP2_WORKAROUND_DELAY_INQUIRY)
207 ", set power condition in start stop unit = "
208 __stringify(SBP2_WORKAROUND_POWER_CONDITION)
202 ", override internal blacklist = " __stringify(SBP2_WORKAROUND_OVERRIDE) 209 ", override internal blacklist = " __stringify(SBP2_WORKAROUND_OVERRIDE)
203 ", or a combination)"); 210 ", or a combination)");
204 211
@@ -359,18 +366,25 @@ static const struct {
359 .firmware_revision = 0x002800, 366 .firmware_revision = 0x002800,
360 .model_id = 0x001010, 367 .model_id = 0x001010,
361 .workarounds = SBP2_WORKAROUND_INQUIRY_36 | 368 .workarounds = SBP2_WORKAROUND_INQUIRY_36 |
362 SBP2_WORKAROUND_MODE_SENSE_8, 369 SBP2_WORKAROUND_MODE_SENSE_8 |
370 SBP2_WORKAROUND_POWER_CONDITION,
363 }, 371 },
364 /* DViCO Momobay FX-3A with TSB42AA9A bridge */ { 372 /* DViCO Momobay FX-3A with TSB42AA9A bridge */ {
365 .firmware_revision = 0x002800, 373 .firmware_revision = 0x002800,
366 .model_id = 0x000000, 374 .model_id = 0x000000,
367 .workarounds = SBP2_WORKAROUND_DELAY_INQUIRY, 375 .workarounds = SBP2_WORKAROUND_DELAY_INQUIRY |
376 SBP2_WORKAROUND_POWER_CONDITION,
368 }, 377 },
369 /* Initio bridges, actually only needed for some older ones */ { 378 /* Initio bridges, actually only needed for some older ones */ {
370 .firmware_revision = 0x000200, 379 .firmware_revision = 0x000200,
371 .model_id = SBP2_ROM_VALUE_WILDCARD, 380 .model_id = SBP2_ROM_VALUE_WILDCARD,
372 .workarounds = SBP2_WORKAROUND_INQUIRY_36, 381 .workarounds = SBP2_WORKAROUND_INQUIRY_36,
373 }, 382 },
383 /* PL-3507 bridge with Prolific firmware */ {
384 .firmware_revision = 0x012800,
385 .model_id = SBP2_ROM_VALUE_WILDCARD,
386 .workarounds = SBP2_WORKAROUND_POWER_CONDITION,
387 },
374 /* Symbios bridge */ { 388 /* Symbios bridge */ {
375 .firmware_revision = 0xa0b800, 389 .firmware_revision = 0xa0b800,
376 .model_id = SBP2_ROM_VALUE_WILDCARD, 390 .model_id = SBP2_ROM_VALUE_WILDCARD,
@@ -1995,6 +2009,8 @@ static int sbp2scsi_slave_configure(struct scsi_device *sdev)
1995 2009
1996 sdev->use_10_for_rw = 1; 2010 sdev->use_10_for_rw = 1;
1997 2011
2012 if (sbp2_exclusive_login)
2013 sdev->manage_start_stop = 1;
1998 if (sdev->type == TYPE_ROM) 2014 if (sdev->type == TYPE_ROM)
1999 sdev->use_10_for_ms = 1; 2015 sdev->use_10_for_ms = 1;
2000 if (sdev->type == TYPE_DISK && 2016 if (sdev->type == TYPE_DISK &&
@@ -2002,6 +2018,8 @@ static int sbp2scsi_slave_configure(struct scsi_device *sdev)
2002 sdev->skip_ms_page_8 = 1; 2018 sdev->skip_ms_page_8 = 1;
2003 if (lu->workarounds & SBP2_WORKAROUND_FIX_CAPACITY) 2019 if (lu->workarounds & SBP2_WORKAROUND_FIX_CAPACITY)
2004 sdev->fix_capacity = 1; 2020 sdev->fix_capacity = 1;
2021 if (lu->workarounds & SBP2_WORKAROUND_POWER_CONDITION)
2022 sdev->start_stop_pwr_cond = 1;
2005 if (lu->workarounds & SBP2_WORKAROUND_128K_MAX_TRANS) 2023 if (lu->workarounds & SBP2_WORKAROUND_128K_MAX_TRANS)
2006 blk_queue_max_sectors(sdev->request_queue, 128 * 1024 / 512); 2024 blk_queue_max_sectors(sdev->request_queue, 128 * 1024 / 512);
2007 return 0; 2025 return 0;
diff --git a/drivers/ieee1394/sbp2.h b/drivers/ieee1394/sbp2.h
index 80d8e097b065..875428bc8d29 100644
--- a/drivers/ieee1394/sbp2.h
+++ b/drivers/ieee1394/sbp2.h
@@ -345,6 +345,7 @@ enum sbp2lu_state_types {
345#define SBP2_WORKAROUND_FIX_CAPACITY 0x8 345#define SBP2_WORKAROUND_FIX_CAPACITY 0x8
346#define SBP2_WORKAROUND_DELAY_INQUIRY 0x10 346#define SBP2_WORKAROUND_DELAY_INQUIRY 0x10
347#define SBP2_INQUIRY_DELAY 12 347#define SBP2_INQUIRY_DELAY 12
348#define SBP2_WORKAROUND_POWER_CONDITION 0x20
348#define SBP2_WORKAROUND_OVERRIDE 0x100 349#define SBP2_WORKAROUND_OVERRIDE 0x100
349 350
350#endif /* SBP2_H */ 351#endif /* SBP2_H */
diff --git a/drivers/ieee1394/video1394.c b/drivers/ieee1394/video1394.c
index e24772d336e1..069b9f6bf16d 100644
--- a/drivers/ieee1394/video1394.c
+++ b/drivers/ieee1394/video1394.c
@@ -1503,6 +1503,8 @@ static int __init video1394_init_module (void)
1503{ 1503{
1504 int ret; 1504 int ret;
1505 1505
1506 hpsb_init_highlevel(&video1394_highlevel);
1507
1506 cdev_init(&video1394_cdev, &video1394_fops); 1508 cdev_init(&video1394_cdev, &video1394_fops);
1507 video1394_cdev.owner = THIS_MODULE; 1509 video1394_cdev.owner = THIS_MODULE;
1508 ret = cdev_add(&video1394_cdev, IEEE1394_VIDEO1394_DEV, 16); 1510 ret = cdev_add(&video1394_cdev, IEEE1394_VIDEO1394_DEV, 16);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 9e23ab0b51a1..55c718828826 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -35,6 +35,7 @@
35#include <rdma/ib_user_verbs.h> 35#include <rdma/ib_user_verbs.h>
36#include <linux/io.h> 36#include <linux/io.h>
37#include <linux/utsname.h> 37#include <linux/utsname.h>
38#include <linux/rculist.h>
38 39
39#include "ipath_kernel.h" 40#include "ipath_kernel.h"
40#include "ipath_verbs.h" 41#include "ipath_verbs.h"
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
index 9e5abf9c309d..d73e32232879 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
@@ -31,8 +31,7 @@
31 * SOFTWARE. 31 * SOFTWARE.
32 */ 32 */
33 33
34#include <linux/list.h> 34#include <linux/rculist.h>
35#include <linux/rcupdate.h>
36 35
37#include "ipath_verbs.h" 36#include "ipath_verbs.h"
38 37
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index c36a03ae9bfb..860d75d81f82 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -20,7 +20,7 @@
20#include <linux/errno.h> 20#include <linux/errno.h>
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <linux/string.h> 22#include <linux/string.h>
23#include <linux/list.h> 23#include <linux/rculist.h>
24#include <linux/notifier.h> 24#include <linux/notifier.h>
25#include <linux/netdevice.h> 25#include <linux/netdevice.h>
26#include <linux/etherdevice.h> 26#include <linux/etherdevice.h>
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 01cefbb2d539..d53312c42547 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1124,6 +1124,8 @@ sd_spinup_disk(struct scsi_disk *sdkp)
1124 cmd[1] = 1; /* Return immediately */ 1124 cmd[1] = 1; /* Return immediately */
1125 memset((void *) &cmd[2], 0, 8); 1125 memset((void *) &cmd[2], 0, 8);
1126 cmd[4] = 1; /* Start spin cycle */ 1126 cmd[4] = 1; /* Start spin cycle */
1127 if (sdkp->device->start_stop_pwr_cond)
1128 cmd[4] |= 1 << 4;
1127 scsi_execute_req(sdkp->device, cmd, DMA_NONE, 1129 scsi_execute_req(sdkp->device, cmd, DMA_NONE,
1128 NULL, 0, &sshdr, 1130 NULL, 0, &sshdr,
1129 SD_TIMEOUT, SD_MAX_RETRIES); 1131 SD_TIMEOUT, SD_MAX_RETRIES);
@@ -1790,6 +1792,9 @@ static int sd_start_stop_device(struct scsi_disk *sdkp, int start)
1790 if (start) 1792 if (start)
1791 cmd[4] |= 1; /* START */ 1793 cmd[4] |= 1; /* START */
1792 1794
1795 if (sdp->start_stop_pwr_cond)
1796 cmd[4] |= start ? 1 << 4 : 3 << 4; /* Active or Standby */
1797
1793 if (!scsi_device_online(sdp)) 1798 if (!scsi_device_online(sdp))
1794 return -ENODEV; 1799 return -ENODEV;
1795 1800
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 4fce3db2cecc..ef87f889ef62 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -195,7 +195,6 @@ static inline int pmd_none_or_clear_bad(pmd_t *pmd)
195 } 195 }
196 return 0; 196 return 0;
197} 197}
198#endif /* CONFIG_MMU */
199 198
200static inline pte_t __ptep_modify_prot_start(struct mm_struct *mm, 199static inline pte_t __ptep_modify_prot_start(struct mm_struct *mm,
201 unsigned long addr, 200 unsigned long addr,
@@ -253,6 +252,7 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
253 __ptep_modify_prot_commit(mm, addr, ptep, pte); 252 __ptep_modify_prot_commit(mm, addr, ptep, pte);
254} 253}
255#endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */ 254#endif /* __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION */
255#endif /* CONFIG_MMU */
256 256
257/* 257/*
258 * A facility to provide lazy MMU batching. This allows PTE updates and 258 * A facility to provide lazy MMU batching. This allows PTE updates and
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index d982eb89c77d..98202c672fde 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -3,6 +3,7 @@
3 3
4#include <asm/atomic.h> 4#include <asm/atomic.h>
5#include <linux/list.h> 5#include <linux/list.h>
6#include <linux/rculist.h>
6#include <linux/spinlock.h> 7#include <linux/spinlock.h>
7#include <linux/cache.h> 8#include <linux/cache.h>
8#include <linux/rcupdate.h> 9#include <linux/rcupdate.h>
diff --git a/include/linux/list.h b/include/linux/list.h
index 08cf4f651889..139ec41d9c2e 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -85,65 +85,6 @@ static inline void list_add_tail(struct list_head *new, struct list_head *head)
85} 85}
86 86
87/* 87/*
88 * Insert a new entry between two known consecutive entries.
89 *
90 * This is only for internal list manipulation where we know
91 * the prev/next entries already!
92 */
93static inline void __list_add_rcu(struct list_head * new,
94 struct list_head * prev, struct list_head * next)
95{
96 new->next = next;
97 new->prev = prev;
98 smp_wmb();
99 next->prev = new;
100 prev->next = new;
101}
102
103/**
104 * list_add_rcu - add a new entry to rcu-protected list
105 * @new: new entry to be added
106 * @head: list head to add it after
107 *
108 * Insert a new entry after the specified head.
109 * This is good for implementing stacks.
110 *
111 * The caller must take whatever precautions are necessary
112 * (such as holding appropriate locks) to avoid racing
113 * with another list-mutation primitive, such as list_add_rcu()
114 * or list_del_rcu(), running on this same list.
115 * However, it is perfectly legal to run concurrently with
116 * the _rcu list-traversal primitives, such as
117 * list_for_each_entry_rcu().
118 */
119static inline void list_add_rcu(struct list_head *new, struct list_head *head)
120{
121 __list_add_rcu(new, head, head->next);
122}
123
124/**
125 * list_add_tail_rcu - add a new entry to rcu-protected list
126 * @new: new entry to be added
127 * @head: list head to add it before
128 *
129 * Insert a new entry before the specified head.
130 * This is useful for implementing queues.
131 *
132 * The caller must take whatever precautions are necessary
133 * (such as holding appropriate locks) to avoid racing
134 * with another list-mutation primitive, such as list_add_tail_rcu()
135 * or list_del_rcu(), running on this same list.
136 * However, it is perfectly legal to run concurrently with
137 * the _rcu list-traversal primitives, such as
138 * list_for_each_entry_rcu().
139 */
140static inline void list_add_tail_rcu(struct list_head *new,
141 struct list_head *head)
142{
143 __list_add_rcu(new, head->prev, head);
144}
145
146/*
147 * Delete a list entry by making the prev/next entries 88 * Delete a list entry by making the prev/next entries
148 * point to each other. 89 * point to each other.
149 * 90 *
@@ -174,36 +115,6 @@ extern void list_del(struct list_head *entry);
174#endif 115#endif
175 116
176/** 117/**
177 * list_del_rcu - deletes entry from list without re-initialization
178 * @entry: the element to delete from the list.
179 *
180 * Note: list_empty() on entry does not return true after this,
181 * the entry is in an undefined state. It is useful for RCU based
182 * lockfree traversal.
183 *
184 * In particular, it means that we can not poison the forward
185 * pointers that may still be used for walking the list.
186 *
187 * The caller must take whatever precautions are necessary
188 * (such as holding appropriate locks) to avoid racing
189 * with another list-mutation primitive, such as list_del_rcu()
190 * or list_add_rcu(), running on this same list.
191 * However, it is perfectly legal to run concurrently with
192 * the _rcu list-traversal primitives, such as
193 * list_for_each_entry_rcu().
194 *
195 * Note that the caller is not permitted to immediately free
196 * the newly deleted entry. Instead, either synchronize_rcu()
197 * or call_rcu() must be used to defer freeing until an RCU
198 * grace period has elapsed.
199 */
200static inline void list_del_rcu(struct list_head *entry)
201{
202 __list_del(entry->prev, entry->next);
203 entry->prev = LIST_POISON2;
204}
205
206/**
207 * list_replace - replace old entry by new one 118 * list_replace - replace old entry by new one
208 * @old : the element to be replaced 119 * @old : the element to be replaced
209 * @new : the new element to insert 120 * @new : the new element to insert
@@ -227,25 +138,6 @@ static inline void list_replace_init(struct list_head *old,
227} 138}
228 139
229/** 140/**
230 * list_replace_rcu - replace old entry by new one
231 * @old : the element to be replaced
232 * @new : the new element to insert
233 *
234 * The @old entry will be replaced with the @new entry atomically.
235 * Note: @old should not be empty.
236 */
237static inline void list_replace_rcu(struct list_head *old,
238 struct list_head *new)
239{
240 new->next = old->next;
241 new->prev = old->prev;
242 smp_wmb();
243 new->next->prev = new;
244 new->prev->next = new;
245 old->prev = LIST_POISON2;
246}
247
248/**
249 * list_del_init - deletes entry from list and reinitialize it. 141 * list_del_init - deletes entry from list and reinitialize it.
250 * @entry: the element to delete from the list. 142 * @entry: the element to delete from the list.
251 */ 143 */
@@ -369,62 +261,6 @@ static inline void list_splice_init(struct list_head *list,
369} 261}
370 262
371/** 263/**
372 * list_splice_init_rcu - splice an RCU-protected list into an existing list.
373 * @list: the RCU-protected list to splice
374 * @head: the place in the list to splice the first list into
375 * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ...
376 *
377 * @head can be RCU-read traversed concurrently with this function.
378 *
379 * Note that this function blocks.
380 *
381 * Important note: the caller must take whatever action is necessary to
382 * prevent any other updates to @head. In principle, it is possible
383 * to modify the list as soon as sync() begins execution.
384 * If this sort of thing becomes necessary, an alternative version
385 * based on call_rcu() could be created. But only if -really-
386 * needed -- there is no shortage of RCU API members.
387 */
388static inline void list_splice_init_rcu(struct list_head *list,
389 struct list_head *head,
390 void (*sync)(void))
391{
392 struct list_head *first = list->next;
393 struct list_head *last = list->prev;
394 struct list_head *at = head->next;
395
396 if (list_empty(head))
397 return;
398
399 /* "first" and "last" tracking list, so initialize it. */
400
401 INIT_LIST_HEAD(list);
402
403 /*
404 * At this point, the list body still points to the source list.
405 * Wait for any readers to finish using the list before splicing
406 * the list body into the new list. Any new readers will see
407 * an empty list.
408 */
409
410 sync();
411
412 /*
413 * Readers are finished with the source list, so perform splice.
414 * The order is important if the new list is global and accessible
415 * to concurrent RCU readers. Note that RCU readers are not
416 * permitted to traverse the prev pointers without excluding
417 * this function.
418 */
419
420 last->next = at;
421 smp_wmb();
422 head->next = first;
423 first->prev = head;
424 at->prev = last;
425}
426
427/**
428 * list_entry - get the struct for this entry 264 * list_entry - get the struct for this entry
429 * @ptr: the &struct list_head pointer. 265 * @ptr: the &struct list_head pointer.
430 * @type: the type of the struct this is embedded in. 266 * @type: the type of the struct this is embedded in.
@@ -629,57 +465,6 @@ static inline void list_splice_init_rcu(struct list_head *list,
629 &pos->member != (head); \ 465 &pos->member != (head); \
630 pos = n, n = list_entry(n->member.prev, typeof(*n), member)) 466 pos = n, n = list_entry(n->member.prev, typeof(*n), member))
631 467
632/**
633 * list_for_each_rcu - iterate over an rcu-protected list
634 * @pos: the &struct list_head to use as a loop cursor.
635 * @head: the head for your list.
636 *
637 * This list-traversal primitive may safely run concurrently with
638 * the _rcu list-mutation primitives such as list_add_rcu()
639 * as long as the traversal is guarded by rcu_read_lock().
640 */
641#define list_for_each_rcu(pos, head) \
642 for (pos = rcu_dereference((head)->next); \
643 prefetch(pos->next), pos != (head); \
644 pos = rcu_dereference(pos->next))
645
646#define __list_for_each_rcu(pos, head) \
647 for (pos = rcu_dereference((head)->next); \
648 pos != (head); \
649 pos = rcu_dereference(pos->next))
650
651/**
652 * list_for_each_entry_rcu - iterate over rcu list of given type
653 * @pos: the type * to use as a loop cursor.
654 * @head: the head for your list.
655 * @member: the name of the list_struct within the struct.
656 *
657 * This list-traversal primitive may safely run concurrently with
658 * the _rcu list-mutation primitives such as list_add_rcu()
659 * as long as the traversal is guarded by rcu_read_lock().
660 */
661#define list_for_each_entry_rcu(pos, head, member) \
662 for (pos = list_entry(rcu_dereference((head)->next), typeof(*pos), member); \
663 prefetch(pos->member.next), &pos->member != (head); \
664 pos = list_entry(rcu_dereference(pos->member.next), typeof(*pos), member))
665
666
667/**
668 * list_for_each_continue_rcu
669 * @pos: the &struct list_head to use as a loop cursor.
670 * @head: the head for your list.
671 *
672 * Iterate over an rcu-protected list, continuing after current point.
673 *
674 * This list-traversal primitive may safely run concurrently with
675 * the _rcu list-mutation primitives such as list_add_rcu()
676 * as long as the traversal is guarded by rcu_read_lock().
677 */
678#define list_for_each_continue_rcu(pos, head) \
679 for ((pos) = rcu_dereference((pos)->next); \
680 prefetch((pos)->next), (pos) != (head); \
681 (pos) = rcu_dereference((pos)->next))
682
683/* 468/*
684 * Double linked lists with a single pointer list head. 469 * Double linked lists with a single pointer list head.
685 * Mostly useful for hash tables where the two pointer list head is 470 * Mostly useful for hash tables where the two pointer list head is
@@ -730,31 +515,6 @@ static inline void hlist_del(struct hlist_node *n)
730 n->pprev = LIST_POISON2; 515 n->pprev = LIST_POISON2;
731} 516}
732 517
733/**
734 * hlist_del_rcu - deletes entry from hash list without re-initialization
735 * @n: the element to delete from the hash list.
736 *
737 * Note: list_unhashed() on entry does not return true after this,
738 * the entry is in an undefined state. It is useful for RCU based
739 * lockfree traversal.
740 *
741 * In particular, it means that we can not poison the forward
742 * pointers that may still be used for walking the hash list.
743 *
744 * The caller must take whatever precautions are necessary
745 * (such as holding appropriate locks) to avoid racing
746 * with another list-mutation primitive, such as hlist_add_head_rcu()
747 * or hlist_del_rcu(), running on this same list.
748 * However, it is perfectly legal to run concurrently with
749 * the _rcu list-traversal primitives, such as
750 * hlist_for_each_entry().
751 */
752static inline void hlist_del_rcu(struct hlist_node *n)
753{
754 __hlist_del(n);
755 n->pprev = LIST_POISON2;
756}
757
758static inline void hlist_del_init(struct hlist_node *n) 518static inline void hlist_del_init(struct hlist_node *n)
759{ 519{
760 if (!hlist_unhashed(n)) { 520 if (!hlist_unhashed(n)) {
@@ -763,27 +523,6 @@ static inline void hlist_del_init(struct hlist_node *n)
763 } 523 }
764} 524}
765 525
766/**
767 * hlist_replace_rcu - replace old entry by new one
768 * @old : the element to be replaced
769 * @new : the new element to insert
770 *
771 * The @old entry will be replaced with the @new entry atomically.
772 */
773static inline void hlist_replace_rcu(struct hlist_node *old,
774 struct hlist_node *new)
775{
776 struct hlist_node *next = old->next;
777
778 new->next = next;
779 new->pprev = old->pprev;
780 smp_wmb();
781 if (next)
782 new->next->pprev = &new->next;
783 *new->pprev = new;
784 old->pprev = LIST_POISON2;
785}
786
787static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) 526static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
788{ 527{
789 struct hlist_node *first = h->first; 528 struct hlist_node *first = h->first;
@@ -794,38 +533,6 @@ static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
794 n->pprev = &h->first; 533 n->pprev = &h->first;
795} 534}
796 535
797
798/**
799 * hlist_add_head_rcu
800 * @n: the element to add to the hash list.
801 * @h: the list to add to.
802 *
803 * Description:
804 * Adds the specified element to the specified hlist,
805 * while permitting racing traversals.
806 *
807 * The caller must take whatever precautions are necessary
808 * (such as holding appropriate locks) to avoid racing
809 * with another list-mutation primitive, such as hlist_add_head_rcu()
810 * or hlist_del_rcu(), running on this same list.
811 * However, it is perfectly legal to run concurrently with
812 * the _rcu list-traversal primitives, such as
813 * hlist_for_each_entry_rcu(), used to prevent memory-consistency
814 * problems on Alpha CPUs. Regardless of the type of CPU, the
815 * list-traversal primitive must be guarded by rcu_read_lock().
816 */
817static inline void hlist_add_head_rcu(struct hlist_node *n,
818 struct hlist_head *h)
819{
820 struct hlist_node *first = h->first;
821 n->next = first;
822 n->pprev = &h->first;
823 smp_wmb();
824 if (first)
825 first->pprev = &n->next;
826 h->first = n;
827}
828
829/* next must be != NULL */ 536/* next must be != NULL */
830static inline void hlist_add_before(struct hlist_node *n, 537static inline void hlist_add_before(struct hlist_node *n,
831 struct hlist_node *next) 538 struct hlist_node *next)
@@ -847,63 +554,6 @@ static inline void hlist_add_after(struct hlist_node *n,
847 next->next->pprev = &next->next; 554 next->next->pprev = &next->next;
848} 555}
849 556
850/**
851 * hlist_add_before_rcu
852 * @n: the new element to add to the hash list.
853 * @next: the existing element to add the new element before.
854 *
855 * Description:
856 * Adds the specified element to the specified hlist
857 * before the specified node while permitting racing traversals.
858 *
859 * The caller must take whatever precautions are necessary
860 * (such as holding appropriate locks) to avoid racing
861 * with another list-mutation primitive, such as hlist_add_head_rcu()
862 * or hlist_del_rcu(), running on this same list.
863 * However, it is perfectly legal to run concurrently with
864 * the _rcu list-traversal primitives, such as
865 * hlist_for_each_entry_rcu(), used to prevent memory-consistency
866 * problems on Alpha CPUs.
867 */
868static inline void hlist_add_before_rcu(struct hlist_node *n,
869 struct hlist_node *next)
870{
871 n->pprev = next->pprev;
872 n->next = next;
873 smp_wmb();
874 next->pprev = &n->next;
875 *(n->pprev) = n;
876}
877
878/**
879 * hlist_add_after_rcu
880 * @prev: the existing element to add the new element after.
881 * @n: the new element to add to the hash list.
882 *
883 * Description:
884 * Adds the specified element to the specified hlist
885 * after the specified node while permitting racing traversals.
886 *
887 * The caller must take whatever precautions are necessary
888 * (such as holding appropriate locks) to avoid racing
889 * with another list-mutation primitive, such as hlist_add_head_rcu()
890 * or hlist_del_rcu(), running on this same list.
891 * However, it is perfectly legal to run concurrently with
892 * the _rcu list-traversal primitives, such as
893 * hlist_for_each_entry_rcu(), used to prevent memory-consistency
894 * problems on Alpha CPUs.
895 */
896static inline void hlist_add_after_rcu(struct hlist_node *prev,
897 struct hlist_node *n)
898{
899 n->next = prev->next;
900 n->pprev = &prev->next;
901 smp_wmb();
902 prev->next = n;
903 if (n->next)
904 n->next->pprev = &n->next;
905}
906
907#define hlist_entry(ptr, type, member) container_of(ptr,type,member) 557#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
908 558
909#define hlist_for_each(pos, head) \ 559#define hlist_for_each(pos, head) \
@@ -964,21 +614,4 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
964 ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ 614 ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
965 pos = n) 615 pos = n)
966 616
967/**
968 * hlist_for_each_entry_rcu - iterate over rcu list of given type
969 * @tpos: the type * to use as a loop cursor.
970 * @pos: the &struct hlist_node to use as a loop cursor.
971 * @head: the head for your list.
972 * @member: the name of the hlist_node within the struct.
973 *
974 * This list-traversal primitive may safely run concurrently with
975 * the _rcu list-mutation primitives such as hlist_add_head_rcu()
976 * as long as the traversal is guarded by rcu_read_lock().
977 */
978#define hlist_for_each_entry_rcu(tpos, pos, head, member) \
979 for (pos = rcu_dereference((head)->first); \
980 pos && ({ prefetch(pos->next); 1;}) && \
981 ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
982 pos = rcu_dereference(pos->next))
983
984#endif 617#endif
diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
index b3aa05baab8a..8c774905dcfe 100644
--- a/include/linux/rcuclassic.h
+++ b/include/linux/rcuclassic.h
@@ -151,7 +151,10 @@ extern struct lockdep_map rcu_lock_map;
151 151
152#define __synchronize_sched() synchronize_rcu() 152#define __synchronize_sched() synchronize_rcu()
153 153
154#define call_rcu_sched(head, func) call_rcu(head, func)
155
154extern void __rcu_init(void); 156extern void __rcu_init(void);
157#define rcu_init_sched() do { } while (0)
155extern void rcu_check_callbacks(int cpu, int user); 158extern void rcu_check_callbacks(int cpu, int user);
156extern void rcu_restart_cpu(int cpu); 159extern void rcu_restart_cpu(int cpu);
157 160
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index bde4586f4382..b0f39be08b6c 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -1,6 +1,373 @@
1#ifndef _LINUX_RCULIST_H 1#ifndef _LINUX_RCULIST_H
2#define _LINUX_RCULIST_H 2#define _LINUX_RCULIST_H
3 3
4#ifdef __KERNEL__
5
6/*
7 * RCU-protected list version
8 */
4#include <linux/list.h> 9#include <linux/list.h>
10#include <linux/rcupdate.h>
11
12/*
13 * Insert a new entry between two known consecutive entries.
14 *
15 * This is only for internal list manipulation where we know
16 * the prev/next entries already!
17 */
18static inline void __list_add_rcu(struct list_head *new,
19 struct list_head *prev, struct list_head *next)
20{
21 new->next = next;
22 new->prev = prev;
23 rcu_assign_pointer(prev->next, new);
24 next->prev = new;
25}
26
27/**
28 * list_add_rcu - add a new entry to rcu-protected list
29 * @new: new entry to be added
30 * @head: list head to add it after
31 *
32 * Insert a new entry after the specified head.
33 * This is good for implementing stacks.
34 *
35 * The caller must take whatever precautions are necessary
36 * (such as holding appropriate locks) to avoid racing
37 * with another list-mutation primitive, such as list_add_rcu()
38 * or list_del_rcu(), running on this same list.
39 * However, it is perfectly legal to run concurrently with
40 * the _rcu list-traversal primitives, such as
41 * list_for_each_entry_rcu().
42 */
43static inline void list_add_rcu(struct list_head *new, struct list_head *head)
44{
45 __list_add_rcu(new, head, head->next);
46}
47
48/**
49 * list_add_tail_rcu - add a new entry to rcu-protected list
50 * @new: new entry to be added
51 * @head: list head to add it before
52 *
53 * Insert a new entry before the specified head.
54 * This is useful for implementing queues.
55 *
56 * The caller must take whatever precautions are necessary
57 * (such as holding appropriate locks) to avoid racing
58 * with another list-mutation primitive, such as list_add_tail_rcu()
59 * or list_del_rcu(), running on this same list.
60 * However, it is perfectly legal to run concurrently with
61 * the _rcu list-traversal primitives, such as
62 * list_for_each_entry_rcu().
63 */
64static inline void list_add_tail_rcu(struct list_head *new,
65 struct list_head *head)
66{
67 __list_add_rcu(new, head->prev, head);
68}
69
70/**
71 * list_del_rcu - deletes entry from list without re-initialization
72 * @entry: the element to delete from the list.
73 *
74 * Note: list_empty() on entry does not return true after this,
75 * the entry is in an undefined state. It is useful for RCU based
76 * lockfree traversal.
77 *
78 * In particular, it means that we can not poison the forward
79 * pointers that may still be used for walking the list.
80 *
81 * The caller must take whatever precautions are necessary
82 * (such as holding appropriate locks) to avoid racing
83 * with another list-mutation primitive, such as list_del_rcu()
84 * or list_add_rcu(), running on this same list.
85 * However, it is perfectly legal to run concurrently with
86 * the _rcu list-traversal primitives, such as
87 * list_for_each_entry_rcu().
88 *
89 * Note that the caller is not permitted to immediately free
90 * the newly deleted entry. Instead, either synchronize_rcu()
91 * or call_rcu() must be used to defer freeing until an RCU
92 * grace period has elapsed.
93 */
94static inline void list_del_rcu(struct list_head *entry)
95{
96 __list_del(entry->prev, entry->next);
97 entry->prev = LIST_POISON2;
98}
99
100/**
101 * list_replace_rcu - replace old entry by new one
102 * @old : the element to be replaced
103 * @new : the new element to insert
104 *
105 * The @old entry will be replaced with the @new entry atomically.
106 * Note: @old should not be empty.
107 */
108static inline void list_replace_rcu(struct list_head *old,
109 struct list_head *new)
110{
111 new->next = old->next;
112 new->prev = old->prev;
113 rcu_assign_pointer(new->prev->next, new);
114 new->next->prev = new;
115 old->prev = LIST_POISON2;
116}
117
118/**
119 * list_splice_init_rcu - splice an RCU-protected list into an existing list.
120 * @list: the RCU-protected list to splice
121 * @head: the place in the list to splice the first list into
122 * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ...
123 *
124 * @head can be RCU-read traversed concurrently with this function.
125 *
126 * Note that this function blocks.
127 *
128 * Important note: the caller must take whatever action is necessary to
129 * prevent any other updates to @head. In principle, it is possible
130 * to modify the list as soon as sync() begins execution.
131 * If this sort of thing becomes necessary, an alternative version
132 * based on call_rcu() could be created. But only if -really-
133 * needed -- there is no shortage of RCU API members.
134 */
135static inline void list_splice_init_rcu(struct list_head *list,
136 struct list_head *head,
137 void (*sync)(void))
138{
139 struct list_head *first = list->next;
140 struct list_head *last = list->prev;
141 struct list_head *at = head->next;
142
143 if (list_empty(head))
144 return;
145
146 /* "first" and "last" tracking list, so initialize it. */
147
148 INIT_LIST_HEAD(list);
149
150 /*
151 * At this point, the list body still points to the source list.
152 * Wait for any readers to finish using the list before splicing
153 * the list body into the new list. Any new readers will see
154 * an empty list.
155 */
156
157 sync();
158
159 /*
160 * Readers are finished with the source list, so perform splice.
161 * The order is important if the new list is global and accessible
162 * to concurrent RCU readers. Note that RCU readers are not
163 * permitted to traverse the prev pointers without excluding
164 * this function.
165 */
166
167 last->next = at;
168 rcu_assign_pointer(head->next, first);
169 first->prev = head;
170 at->prev = last;
171}
172
173/**
174 * list_for_each_rcu - iterate over an rcu-protected list
175 * @pos: the &struct list_head to use as a loop cursor.
176 * @head: the head for your list.
177 *
178 * This list-traversal primitive may safely run concurrently with
179 * the _rcu list-mutation primitives such as list_add_rcu()
180 * as long as the traversal is guarded by rcu_read_lock().
181 */
182#define list_for_each_rcu(pos, head) \
183 for (pos = rcu_dereference((head)->next); \
184 prefetch(pos->next), pos != (head); \
185 pos = rcu_dereference(pos->next))
186
187#define __list_for_each_rcu(pos, head) \
188 for (pos = rcu_dereference((head)->next); \
189 pos != (head); \
190 pos = rcu_dereference(pos->next))
191
192/**
193 * list_for_each_entry_rcu - iterate over rcu list of given type
194 * @pos: the type * to use as a loop cursor.
195 * @head: the head for your list.
196 * @member: the name of the list_struct within the struct.
197 *
198 * This list-traversal primitive may safely run concurrently with
199 * the _rcu list-mutation primitives such as list_add_rcu()
200 * as long as the traversal is guarded by rcu_read_lock().
201 */
202#define list_for_each_entry_rcu(pos, head, member) \
203 for (pos = list_entry(rcu_dereference((head)->next), typeof(*pos), member); \
204 prefetch(pos->member.next), &pos->member != (head); \
205 pos = list_entry(rcu_dereference(pos->member.next), typeof(*pos), member))
206
207
208/**
209 * list_for_each_continue_rcu
210 * @pos: the &struct list_head to use as a loop cursor.
211 * @head: the head for your list.
212 *
213 * Iterate over an rcu-protected list, continuing after current point.
214 *
215 * This list-traversal primitive may safely run concurrently with
216 * the _rcu list-mutation primitives such as list_add_rcu()
217 * as long as the traversal is guarded by rcu_read_lock().
218 */
219#define list_for_each_continue_rcu(pos, head) \
220 for ((pos) = rcu_dereference((pos)->next); \
221 prefetch((pos)->next), (pos) != (head); \
222 (pos) = rcu_dereference((pos)->next))
223
224/**
225 * hlist_del_rcu - deletes entry from hash list without re-initialization
226 * @n: the element to delete from the hash list.
227 *
228 * Note: list_unhashed() on entry does not return true after this,
229 * the entry is in an undefined state. It is useful for RCU based
230 * lockfree traversal.
231 *
232 * In particular, it means that we can not poison the forward
233 * pointers that may still be used for walking the hash list.
234 *
235 * The caller must take whatever precautions are necessary
236 * (such as holding appropriate locks) to avoid racing
237 * with another list-mutation primitive, such as hlist_add_head_rcu()
238 * or hlist_del_rcu(), running on this same list.
239 * However, it is perfectly legal to run concurrently with
240 * the _rcu list-traversal primitives, such as
241 * hlist_for_each_entry().
242 */
243static inline void hlist_del_rcu(struct hlist_node *n)
244{
245 __hlist_del(n);
246 n->pprev = LIST_POISON2;
247}
248
249/**
250 * hlist_replace_rcu - replace old entry by new one
251 * @old : the element to be replaced
252 * @new : the new element to insert
253 *
254 * The @old entry will be replaced with the @new entry atomically.
255 */
256static inline void hlist_replace_rcu(struct hlist_node *old,
257 struct hlist_node *new)
258{
259 struct hlist_node *next = old->next;
260
261 new->next = next;
262 new->pprev = old->pprev;
263 rcu_assign_pointer(*new->pprev, new);
264 if (next)
265 new->next->pprev = &new->next;
266 old->pprev = LIST_POISON2;
267}
268
269/**
270 * hlist_add_head_rcu
271 * @n: the element to add to the hash list.
272 * @h: the list to add to.
273 *
274 * Description:
275 * Adds the specified element to the specified hlist,
276 * while permitting racing traversals.
277 *
278 * The caller must take whatever precautions are necessary
279 * (such as holding appropriate locks) to avoid racing
280 * with another list-mutation primitive, such as hlist_add_head_rcu()
281 * or hlist_del_rcu(), running on this same list.
282 * However, it is perfectly legal to run concurrently with
283 * the _rcu list-traversal primitives, such as
284 * hlist_for_each_entry_rcu(), used to prevent memory-consistency
285 * problems on Alpha CPUs. Regardless of the type of CPU, the
286 * list-traversal primitive must be guarded by rcu_read_lock().
287 */
288static inline void hlist_add_head_rcu(struct hlist_node *n,
289 struct hlist_head *h)
290{
291 struct hlist_node *first = h->first;
292
293 n->next = first;
294 n->pprev = &h->first;
295 rcu_assign_pointer(h->first, n);
296 if (first)
297 first->pprev = &n->next;
298}
299
300/**
301 * hlist_add_before_rcu
302 * @n: the new element to add to the hash list.
303 * @next: the existing element to add the new element before.
304 *
305 * Description:
306 * Adds the specified element to the specified hlist
307 * before the specified node while permitting racing traversals.
308 *
309 * The caller must take whatever precautions are necessary
310 * (such as holding appropriate locks) to avoid racing
311 * with another list-mutation primitive, such as hlist_add_head_rcu()
312 * or hlist_del_rcu(), running on this same list.
313 * However, it is perfectly legal to run concurrently with
314 * the _rcu list-traversal primitives, such as
315 * hlist_for_each_entry_rcu(), used to prevent memory-consistency
316 * problems on Alpha CPUs.
317 */
318static inline void hlist_add_before_rcu(struct hlist_node *n,
319 struct hlist_node *next)
320{
321 n->pprev = next->pprev;
322 n->next = next;
323 rcu_assign_pointer(*(n->pprev), n);
324 next->pprev = &n->next;
325}
326
327/**
328 * hlist_add_after_rcu
329 * @prev: the existing element to add the new element after.
330 * @n: the new element to add to the hash list.
331 *
332 * Description:
333 * Adds the specified element to the specified hlist
334 * after the specified node while permitting racing traversals.
335 *
336 * The caller must take whatever precautions are necessary
337 * (such as holding appropriate locks) to avoid racing
338 * with another list-mutation primitive, such as hlist_add_head_rcu()
339 * or hlist_del_rcu(), running on this same list.
340 * However, it is perfectly legal to run concurrently with
341 * the _rcu list-traversal primitives, such as
342 * hlist_for_each_entry_rcu(), used to prevent memory-consistency
343 * problems on Alpha CPUs.
344 */
345static inline void hlist_add_after_rcu(struct hlist_node *prev,
346 struct hlist_node *n)
347{
348 n->next = prev->next;
349 n->pprev = &prev->next;
350 rcu_assign_pointer(prev->next, n);
351 if (n->next)
352 n->next->pprev = &n->next;
353}
354
355/**
356 * hlist_for_each_entry_rcu - iterate over rcu list of given type
357 * @tpos: the type * to use as a loop cursor.
358 * @pos: the &struct hlist_node to use as a loop cursor.
359 * @head: the head for your list.
360 * @member: the name of the hlist_node within the struct.
361 *
362 * This list-traversal primitive may safely run concurrently with
363 * the _rcu list-mutation primitives such as hlist_add_head_rcu()
364 * as long as the traversal is guarded by rcu_read_lock().
365 */
366#define hlist_for_each_entry_rcu(tpos, pos, head, member) \
367 for (pos = rcu_dereference((head)->first); \
368 pos && ({ prefetch(pos->next); 1; }) && \
369 ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
370 pos = rcu_dereference(pos->next))
5 371
6#endif /* _LINUX_RCULIST_H */ 372#endif /* __KERNEL__ */
373#endif
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index d42dbec06083..e8b4039cfb2f 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -40,6 +40,7 @@
40#include <linux/cpumask.h> 40#include <linux/cpumask.h>
41#include <linux/seqlock.h> 41#include <linux/seqlock.h>
42#include <linux/lockdep.h> 42#include <linux/lockdep.h>
43#include <linux/completion.h>
43 44
44/** 45/**
45 * struct rcu_head - callback structure for use with RCU 46 * struct rcu_head - callback structure for use with RCU
@@ -168,6 +169,27 @@ struct rcu_head {
168 (p) = (v); \ 169 (p) = (v); \
169 }) 170 })
170 171
172/* Infrastructure to implement the synchronize_() primitives. */
173
174struct rcu_synchronize {
175 struct rcu_head head;
176 struct completion completion;
177};
178
179extern void wakeme_after_rcu(struct rcu_head *head);
180
181#define synchronize_rcu_xxx(name, func) \
182void name(void) \
183{ \
184 struct rcu_synchronize rcu; \
185 \
186 init_completion(&rcu.completion); \
187 /* Will wake me after RCU finished. */ \
188 func(&rcu.head, wakeme_after_rcu); \
189 /* Wait for it. */ \
190 wait_for_completion(&rcu.completion); \
191}
192
171/** 193/**
172 * synchronize_sched - block until all CPUs have exited any non-preemptive 194 * synchronize_sched - block until all CPUs have exited any non-preemptive
173 * kernel code sequences. 195 * kernel code sequences.
@@ -224,8 +246,8 @@ extern void call_rcu_bh(struct rcu_head *head,
224/* Exported common interfaces */ 246/* Exported common interfaces */
225extern void synchronize_rcu(void); 247extern void synchronize_rcu(void);
226extern void rcu_barrier(void); 248extern void rcu_barrier(void);
227extern long rcu_batches_completed(void); 249extern void rcu_barrier_bh(void);
228extern long rcu_batches_completed_bh(void); 250extern void rcu_barrier_sched(void);
229 251
230/* Internal to kernel */ 252/* Internal to kernel */
231extern void rcu_init(void); 253extern void rcu_init(void);
diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h
index 8a05c7e20bc4..f04b64eca636 100644
--- a/include/linux/rcupreempt.h
+++ b/include/linux/rcupreempt.h
@@ -40,10 +40,39 @@
40#include <linux/cpumask.h> 40#include <linux/cpumask.h>
41#include <linux/seqlock.h> 41#include <linux/seqlock.h>
42 42
43#define rcu_qsctr_inc(cpu) 43struct rcu_dyntick_sched {
44 int dynticks;
45 int dynticks_snap;
46 int sched_qs;
47 int sched_qs_snap;
48 int sched_dynticks_snap;
49};
50
51DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched);
52
53static inline void rcu_qsctr_inc(int cpu)
54{
55 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
56
57 rdssp->sched_qs++;
58}
44#define rcu_bh_qsctr_inc(cpu) 59#define rcu_bh_qsctr_inc(cpu)
45#define call_rcu_bh(head, rcu) call_rcu(head, rcu) 60#define call_rcu_bh(head, rcu) call_rcu(head, rcu)
46 61
62/**
63 * call_rcu_sched - Queue RCU callback for invocation after sched grace period.
64 * @head: structure to be used for queueing the RCU updates.
65 * @func: actual update function to be invoked after the grace period
66 *
67 * The update function will be invoked some time after a full
68 * synchronize_sched()-style grace period elapses, in other words after
69 * all currently executing preempt-disabled sections of code (including
70 * hardirq handlers, NMI handlers, and local_irq_save() blocks) have
71 * completed.
72 */
73extern void call_rcu_sched(struct rcu_head *head,
74 void (*func)(struct rcu_head *head));
75
47extern void __rcu_read_lock(void) __acquires(RCU); 76extern void __rcu_read_lock(void) __acquires(RCU);
48extern void __rcu_read_unlock(void) __releases(RCU); 77extern void __rcu_read_unlock(void) __releases(RCU);
49extern int rcu_pending(int cpu); 78extern int rcu_pending(int cpu);
@@ -55,6 +84,7 @@ extern int rcu_needs_cpu(int cpu);
55extern void __synchronize_sched(void); 84extern void __synchronize_sched(void);
56 85
57extern void __rcu_init(void); 86extern void __rcu_init(void);
87extern void rcu_init_sched(void);
58extern void rcu_check_callbacks(int cpu, int user); 88extern void rcu_check_callbacks(int cpu, int user);
59extern void rcu_restart_cpu(int cpu); 89extern void rcu_restart_cpu(int cpu);
60extern long rcu_batches_completed(void); 90extern long rcu_batches_completed(void);
@@ -81,20 +111,20 @@ extern struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu);
81struct softirq_action; 111struct softirq_action;
82 112
83#ifdef CONFIG_NO_HZ 113#ifdef CONFIG_NO_HZ
84DECLARE_PER_CPU(long, dynticks_progress_counter); 114DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched);
85 115
86static inline void rcu_enter_nohz(void) 116static inline void rcu_enter_nohz(void)
87{ 117{
88 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ 118 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
89 __get_cpu_var(dynticks_progress_counter)++; 119 __get_cpu_var(rcu_dyntick_sched).dynticks++;
90 WARN_ON(__get_cpu_var(dynticks_progress_counter) & 0x1); 120 WARN_ON(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1);
91} 121}
92 122
93static inline void rcu_exit_nohz(void) 123static inline void rcu_exit_nohz(void)
94{ 124{
95 __get_cpu_var(dynticks_progress_counter)++;
96 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ 125 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
97 WARN_ON(!(__get_cpu_var(dynticks_progress_counter) & 0x1)); 126 __get_cpu_var(rcu_dyntick_sched).dynticks++;
127 WARN_ON(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1));
98} 128}
99 129
100#else /* CONFIG_NO_HZ */ 130#else /* CONFIG_NO_HZ */
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index f6a9fe0ef09c..00b78763a1bf 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -134,6 +134,7 @@ struct scsi_device {
134 unsigned no_start_on_add:1; /* do not issue start on add */ 134 unsigned no_start_on_add:1; /* do not issue start on add */
135 unsigned allow_restart:1; /* issue START_UNIT in error handler */ 135 unsigned allow_restart:1; /* issue START_UNIT in error handler */
136 unsigned manage_start_stop:1; /* Let HLD (sd) manage start/stop */ 136 unsigned manage_start_stop:1; /* Let HLD (sd) manage start/stop */
137 unsigned start_stop_pwr_cond:1; /* Set power cond. in START_STOP_UNIT */
137 unsigned no_uld_attach:1; /* disable connecting to upper level drivers */ 138 unsigned no_uld_attach:1; /* disable connecting to upper level drivers */
138 unsigned select_no_atn:1; 139 unsigned select_no_atn:1;
139 unsigned fix_capacity:1; /* READ_CAPACITY is too high by 1 */ 140 unsigned fix_capacity:1; /* READ_CAPACITY is too high by 1 */
diff --git a/init/main.c b/init/main.c
index 1efcccff1bdb..edeace036fd9 100644
--- a/init/main.c
+++ b/init/main.c
@@ -759,6 +759,7 @@ static void __init do_initcalls(void)
759 */ 759 */
760static void __init do_basic_setup(void) 760static void __init do_basic_setup(void)
761{ 761{
762 rcu_init_sched(); /* needed by module_init stage. */
762 /* drivers will send hotplug events */ 763 /* drivers will send hotplug events */
763 init_workqueues(); 764 init_workqueues();
764 usermodehelper_init(); 765 usermodehelper_init();
diff --git a/kernel/pid.c b/kernel/pid.c
index 20d59fa2d493..30bd5d4b2ac7 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -30,6 +30,7 @@
30#include <linux/module.h> 30#include <linux/module.h>
31#include <linux/slab.h> 31#include <linux/slab.h>
32#include <linux/init.h> 32#include <linux/init.h>
33#include <linux/rculist.h>
33#include <linux/bootmem.h> 34#include <linux/bootmem.h>
34#include <linux/hash.h> 35#include <linux/hash.h>
35#include <linux/pid_namespace.h> 36#include <linux/pid_namespace.h>
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index 65c0906080ef..16eeeaa9d618 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -387,6 +387,10 @@ static void __rcu_offline_cpu(struct rcu_data *this_rdp,
387 rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail); 387 rcu_move_batch(this_rdp, rdp->donelist, rdp->donetail);
388 rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail); 388 rcu_move_batch(this_rdp, rdp->curlist, rdp->curtail);
389 rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail); 389 rcu_move_batch(this_rdp, rdp->nxtlist, rdp->nxttail);
390
391 local_irq_disable();
392 this_rdp->qlen += rdp->qlen;
393 local_irq_enable();
390} 394}
391 395
392static void rcu_offline_cpu(int cpu) 396static void rcu_offline_cpu(int cpu)
@@ -516,10 +520,38 @@ void rcu_check_callbacks(int cpu, int user)
516 if (user || 520 if (user ||
517 (idle_cpu(cpu) && !in_softirq() && 521 (idle_cpu(cpu) && !in_softirq() &&
518 hardirq_count() <= (1 << HARDIRQ_SHIFT))) { 522 hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
523
524 /*
525 * Get here if this CPU took its interrupt from user
526 * mode or from the idle loop, and if this is not a
527 * nested interrupt. In this case, the CPU is in
528 * a quiescent state, so count it.
529 *
530 * Also do a memory barrier. This is needed to handle
531 * the case where writes from a preempt-disable section
532 * of code get reordered into schedule() by this CPU's
533 * write buffer. The memory barrier makes sure that
534 * the rcu_qsctr_inc() and rcu_bh_qsctr_inc() are see
535 * by other CPUs to happen after any such write.
536 */
537
538 smp_mb(); /* See above block comment. */
519 rcu_qsctr_inc(cpu); 539 rcu_qsctr_inc(cpu);
520 rcu_bh_qsctr_inc(cpu); 540 rcu_bh_qsctr_inc(cpu);
521 } else if (!in_softirq()) 541
542 } else if (!in_softirq()) {
543
544 /*
545 * Get here if this CPU did not take its interrupt from
546 * softirq, in other words, if it is not interrupting
547 * a rcu_bh read-side critical section. This is an _bh
548 * critical section, so count it. The memory barrier
549 * is needed for the same reason as is the above one.
550 */
551
552 smp_mb(); /* See above block comment. */
522 rcu_bh_qsctr_inc(cpu); 553 rcu_bh_qsctr_inc(cpu);
554 }
523 raise_rcu_softirq(); 555 raise_rcu_softirq();
524} 556}
525 557
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 6addab5e6d88..f14f372cf6f5 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -39,16 +39,16 @@
39#include <linux/sched.h> 39#include <linux/sched.h>
40#include <asm/atomic.h> 40#include <asm/atomic.h>
41#include <linux/bitops.h> 41#include <linux/bitops.h>
42#include <linux/completion.h>
43#include <linux/percpu.h> 42#include <linux/percpu.h>
44#include <linux/notifier.h> 43#include <linux/notifier.h>
45#include <linux/cpu.h> 44#include <linux/cpu.h>
46#include <linux/mutex.h> 45#include <linux/mutex.h>
47#include <linux/module.h> 46#include <linux/module.h>
48 47
49struct rcu_synchronize { 48enum rcu_barrier {
50 struct rcu_head head; 49 RCU_BARRIER_STD,
51 struct completion completion; 50 RCU_BARRIER_BH,
51 RCU_BARRIER_SCHED,
52}; 52};
53 53
54static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; 54static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
@@ -60,7 +60,7 @@ static struct completion rcu_barrier_completion;
60 * Awaken the corresponding synchronize_rcu() instance now that a 60 * Awaken the corresponding synchronize_rcu() instance now that a
61 * grace period has elapsed. 61 * grace period has elapsed.
62 */ 62 */
63static void wakeme_after_rcu(struct rcu_head *head) 63void wakeme_after_rcu(struct rcu_head *head)
64{ 64{
65 struct rcu_synchronize *rcu; 65 struct rcu_synchronize *rcu;
66 66
@@ -77,17 +77,7 @@ static void wakeme_after_rcu(struct rcu_head *head)
77 * sections are delimited by rcu_read_lock() and rcu_read_unlock(), 77 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
78 * and may be nested. 78 * and may be nested.
79 */ 79 */
80void synchronize_rcu(void) 80synchronize_rcu_xxx(synchronize_rcu, call_rcu)
81{
82 struct rcu_synchronize rcu;
83
84 init_completion(&rcu.completion);
85 /* Will wake me after RCU finished */
86 call_rcu(&rcu.head, wakeme_after_rcu);
87
88 /* Wait for it */
89 wait_for_completion(&rcu.completion);
90}
91EXPORT_SYMBOL_GPL(synchronize_rcu); 81EXPORT_SYMBOL_GPL(synchronize_rcu);
92 82
93static void rcu_barrier_callback(struct rcu_head *notused) 83static void rcu_barrier_callback(struct rcu_head *notused)
@@ -99,19 +89,30 @@ static void rcu_barrier_callback(struct rcu_head *notused)
99/* 89/*
100 * Called with preemption disabled, and from cross-cpu IRQ context. 90 * Called with preemption disabled, and from cross-cpu IRQ context.
101 */ 91 */
102static void rcu_barrier_func(void *notused) 92static void rcu_barrier_func(void *type)
103{ 93{
104 int cpu = smp_processor_id(); 94 int cpu = smp_processor_id();
105 struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu); 95 struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu);
106 96
107 atomic_inc(&rcu_barrier_cpu_count); 97 atomic_inc(&rcu_barrier_cpu_count);
108 call_rcu(head, rcu_barrier_callback); 98 switch ((enum rcu_barrier)type) {
99 case RCU_BARRIER_STD:
100 call_rcu(head, rcu_barrier_callback);
101 break;
102 case RCU_BARRIER_BH:
103 call_rcu_bh(head, rcu_barrier_callback);
104 break;
105 case RCU_BARRIER_SCHED:
106 call_rcu_sched(head, rcu_barrier_callback);
107 break;
108 }
109} 109}
110 110
111/** 111/*
112 * rcu_barrier - Wait until all the in-flight RCUs are complete. 112 * Orchestrate the specified type of RCU barrier, waiting for all
113 * RCU callbacks of the specified type to complete.
113 */ 114 */
114void rcu_barrier(void) 115static void _rcu_barrier(enum rcu_barrier type)
115{ 116{
116 BUG_ON(in_interrupt()); 117 BUG_ON(in_interrupt());
117 /* Take cpucontrol mutex to protect against CPU hotplug */ 118 /* Take cpucontrol mutex to protect against CPU hotplug */
@@ -127,13 +128,39 @@ void rcu_barrier(void)
127 * until all the callbacks are queued. 128 * until all the callbacks are queued.
128 */ 129 */
129 rcu_read_lock(); 130 rcu_read_lock();
130 on_each_cpu(rcu_barrier_func, NULL, 1); 131 on_each_cpu(rcu_barrier_func, (void *)type, 1);
131 rcu_read_unlock(); 132 rcu_read_unlock();
132 wait_for_completion(&rcu_barrier_completion); 133 wait_for_completion(&rcu_barrier_completion);
133 mutex_unlock(&rcu_barrier_mutex); 134 mutex_unlock(&rcu_barrier_mutex);
134} 135}
136
137/**
138 * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
139 */
140void rcu_barrier(void)
141{
142 _rcu_barrier(RCU_BARRIER_STD);
143}
135EXPORT_SYMBOL_GPL(rcu_barrier); 144EXPORT_SYMBOL_GPL(rcu_barrier);
136 145
146/**
147 * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
148 */
149void rcu_barrier_bh(void)
150{
151 _rcu_barrier(RCU_BARRIER_BH);
152}
153EXPORT_SYMBOL_GPL(rcu_barrier_bh);
154
155/**
156 * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
157 */
158void rcu_barrier_sched(void)
159{
160 _rcu_barrier(RCU_BARRIER_SCHED);
161}
162EXPORT_SYMBOL_GPL(rcu_barrier_sched);
163
137void __init rcu_init(void) 164void __init rcu_init(void)
138{ 165{
139 __rcu_init(); 166 __rcu_init();
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index 9bf445664457..6f62b77d93c4 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -46,11 +46,11 @@
46#include <asm/atomic.h> 46#include <asm/atomic.h>
47#include <linux/bitops.h> 47#include <linux/bitops.h>
48#include <linux/module.h> 48#include <linux/module.h>
49#include <linux/kthread.h>
49#include <linux/completion.h> 50#include <linux/completion.h>
50#include <linux/moduleparam.h> 51#include <linux/moduleparam.h>
51#include <linux/percpu.h> 52#include <linux/percpu.h>
52#include <linux/notifier.h> 53#include <linux/notifier.h>
53#include <linux/rcupdate.h>
54#include <linux/cpu.h> 54#include <linux/cpu.h>
55#include <linux/random.h> 55#include <linux/random.h>
56#include <linux/delay.h> 56#include <linux/delay.h>
@@ -82,14 +82,18 @@ struct rcu_data {
82 spinlock_t lock; /* Protect rcu_data fields. */ 82 spinlock_t lock; /* Protect rcu_data fields. */
83 long completed; /* Number of last completed batch. */ 83 long completed; /* Number of last completed batch. */
84 int waitlistcount; 84 int waitlistcount;
85 struct tasklet_struct rcu_tasklet;
86 struct rcu_head *nextlist; 85 struct rcu_head *nextlist;
87 struct rcu_head **nexttail; 86 struct rcu_head **nexttail;
88 struct rcu_head *waitlist[GP_STAGES]; 87 struct rcu_head *waitlist[GP_STAGES];
89 struct rcu_head **waittail[GP_STAGES]; 88 struct rcu_head **waittail[GP_STAGES];
90 struct rcu_head *donelist; 89 struct rcu_head *donelist; /* from waitlist & waitschedlist */
91 struct rcu_head **donetail; 90 struct rcu_head **donetail;
92 long rcu_flipctr[2]; 91 long rcu_flipctr[2];
92 struct rcu_head *nextschedlist;
93 struct rcu_head **nextschedtail;
94 struct rcu_head *waitschedlist;
95 struct rcu_head **waitschedtail;
96 int rcu_sched_sleeping;
93#ifdef CONFIG_RCU_TRACE 97#ifdef CONFIG_RCU_TRACE
94 struct rcupreempt_trace trace; 98 struct rcupreempt_trace trace;
95#endif /* #ifdef CONFIG_RCU_TRACE */ 99#endif /* #ifdef CONFIG_RCU_TRACE */
@@ -131,11 +135,24 @@ enum rcu_try_flip_states {
131 rcu_try_flip_waitmb_state, 135 rcu_try_flip_waitmb_state,
132}; 136};
133 137
138/*
139 * States for rcu_ctrlblk.rcu_sched_sleep.
140 */
141
142enum rcu_sched_sleep_states {
143 rcu_sched_not_sleeping, /* Not sleeping, callbacks need GP. */
144 rcu_sched_sleep_prep, /* Thinking of sleeping, rechecking. */
145 rcu_sched_sleeping, /* Sleeping, awaken if GP needed. */
146};
147
134struct rcu_ctrlblk { 148struct rcu_ctrlblk {
135 spinlock_t fliplock; /* Protect state-machine transitions. */ 149 spinlock_t fliplock; /* Protect state-machine transitions. */
136 long completed; /* Number of last completed batch. */ 150 long completed; /* Number of last completed batch. */
137 enum rcu_try_flip_states rcu_try_flip_state; /* The current state of 151 enum rcu_try_flip_states rcu_try_flip_state; /* The current state of
138 the rcu state machine */ 152 the rcu state machine */
153 spinlock_t schedlock; /* Protect rcu_sched sleep state. */
154 enum rcu_sched_sleep_states sched_sleep; /* rcu_sched state. */
155 wait_queue_head_t sched_wq; /* Place for rcu_sched to sleep. */
139}; 156};
140 157
141static DEFINE_PER_CPU(struct rcu_data, rcu_data); 158static DEFINE_PER_CPU(struct rcu_data, rcu_data);
@@ -143,8 +160,12 @@ static struct rcu_ctrlblk rcu_ctrlblk = {
143 .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock), 160 .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock),
144 .completed = 0, 161 .completed = 0,
145 .rcu_try_flip_state = rcu_try_flip_idle_state, 162 .rcu_try_flip_state = rcu_try_flip_idle_state,
163 .schedlock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.schedlock),
164 .sched_sleep = rcu_sched_not_sleeping,
165 .sched_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rcu_ctrlblk.sched_wq),
146}; 166};
147 167
168static struct task_struct *rcu_sched_grace_period_task;
148 169
149#ifdef CONFIG_RCU_TRACE 170#ifdef CONFIG_RCU_TRACE
150static char *rcu_try_flip_state_names[] = 171static char *rcu_try_flip_state_names[] =
@@ -207,6 +228,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_mb_flag_values, rcu_mb_flag)
207 */ 228 */
208#define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace)); 229#define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace));
209 230
231#define RCU_SCHED_BATCH_TIME (HZ / 50)
232
210/* 233/*
211 * Return the number of RCU batches processed thus far. Useful 234 * Return the number of RCU batches processed thus far. Useful
212 * for debug and statistics. 235 * for debug and statistics.
@@ -411,32 +434,34 @@ static void __rcu_advance_callbacks(struct rcu_data *rdp)
411 } 434 }
412} 435}
413 436
414#ifdef CONFIG_NO_HZ 437DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched) = {
438 .dynticks = 1,
439};
415 440
416DEFINE_PER_CPU(long, dynticks_progress_counter) = 1; 441#ifdef CONFIG_NO_HZ
417static DEFINE_PER_CPU(long, rcu_dyntick_snapshot);
418static DEFINE_PER_CPU(int, rcu_update_flag); 442static DEFINE_PER_CPU(int, rcu_update_flag);
419 443
420/** 444/**
421 * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI. 445 * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI.
422 * 446 *
423 * If the CPU was idle with dynamic ticks active, this updates the 447 * If the CPU was idle with dynamic ticks active, this updates the
424 * dynticks_progress_counter to let the RCU handling know that the 448 * rcu_dyntick_sched.dynticks to let the RCU handling know that the
425 * CPU is active. 449 * CPU is active.
426 */ 450 */
427void rcu_irq_enter(void) 451void rcu_irq_enter(void)
428{ 452{
429 int cpu = smp_processor_id(); 453 int cpu = smp_processor_id();
454 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
430 455
431 if (per_cpu(rcu_update_flag, cpu)) 456 if (per_cpu(rcu_update_flag, cpu))
432 per_cpu(rcu_update_flag, cpu)++; 457 per_cpu(rcu_update_flag, cpu)++;
433 458
434 /* 459 /*
435 * Only update if we are coming from a stopped ticks mode 460 * Only update if we are coming from a stopped ticks mode
436 * (dynticks_progress_counter is even). 461 * (rcu_dyntick_sched.dynticks is even).
437 */ 462 */
438 if (!in_interrupt() && 463 if (!in_interrupt() &&
439 (per_cpu(dynticks_progress_counter, cpu) & 0x1) == 0) { 464 (rdssp->dynticks & 0x1) == 0) {
440 /* 465 /*
441 * The following might seem like we could have a race 466 * The following might seem like we could have a race
442 * with NMI/SMIs. But this really isn't a problem. 467 * with NMI/SMIs. But this really isn't a problem.
@@ -459,12 +484,12 @@ void rcu_irq_enter(void)
459 * RCU read-side critical sections on this CPU would 484 * RCU read-side critical sections on this CPU would
460 * have already completed. 485 * have already completed.
461 */ 486 */
462 per_cpu(dynticks_progress_counter, cpu)++; 487 rdssp->dynticks++;
463 /* 488 /*
464 * The following memory barrier ensures that any 489 * The following memory barrier ensures that any
465 * rcu_read_lock() primitives in the irq handler 490 * rcu_read_lock() primitives in the irq handler
466 * are seen by other CPUs to follow the above 491 * are seen by other CPUs to follow the above
467 * increment to dynticks_progress_counter. This is 492 * increment to rcu_dyntick_sched.dynticks. This is
468 * required in order for other CPUs to correctly 493 * required in order for other CPUs to correctly
469 * determine when it is safe to advance the RCU 494 * determine when it is safe to advance the RCU
470 * grace-period state machine. 495 * grace-period state machine.
@@ -472,7 +497,7 @@ void rcu_irq_enter(void)
472 smp_mb(); /* see above block comment. */ 497 smp_mb(); /* see above block comment. */
473 /* 498 /*
474 * Since we can't determine the dynamic tick mode from 499 * Since we can't determine the dynamic tick mode from
475 * the dynticks_progress_counter after this routine, 500 * the rcu_dyntick_sched.dynticks after this routine,
476 * we use a second flag to acknowledge that we came 501 * we use a second flag to acknowledge that we came
477 * from an idle state with ticks stopped. 502 * from an idle state with ticks stopped.
478 */ 503 */
@@ -480,7 +505,7 @@ void rcu_irq_enter(void)
480 /* 505 /*
481 * If we take an NMI/SMI now, they will also increment 506 * If we take an NMI/SMI now, they will also increment
482 * the rcu_update_flag, and will not update the 507 * the rcu_update_flag, and will not update the
483 * dynticks_progress_counter on exit. That is for 508 * rcu_dyntick_sched.dynticks on exit. That is for
484 * this IRQ to do. 509 * this IRQ to do.
485 */ 510 */
486 } 511 }
@@ -490,12 +515,13 @@ void rcu_irq_enter(void)
490 * rcu_irq_exit - Called from exiting Hard irq context. 515 * rcu_irq_exit - Called from exiting Hard irq context.
491 * 516 *
492 * If the CPU was idle with dynamic ticks active, update the 517 * If the CPU was idle with dynamic ticks active, update the
493 * dynticks_progress_counter to put let the RCU handling be 518 * rcu_dyntick_sched.dynticks to put let the RCU handling be
494 * aware that the CPU is going back to idle with no ticks. 519 * aware that the CPU is going back to idle with no ticks.
495 */ 520 */
496void rcu_irq_exit(void) 521void rcu_irq_exit(void)
497{ 522{
498 int cpu = smp_processor_id(); 523 int cpu = smp_processor_id();
524 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
499 525
500 /* 526 /*
501 * rcu_update_flag is set if we interrupted the CPU 527 * rcu_update_flag is set if we interrupted the CPU
@@ -503,7 +529,7 @@ void rcu_irq_exit(void)
503 * Once this occurs, we keep track of interrupt nesting 529 * Once this occurs, we keep track of interrupt nesting
504 * because a NMI/SMI could also come in, and we still 530 * because a NMI/SMI could also come in, and we still
505 * only want the IRQ that started the increment of the 531 * only want the IRQ that started the increment of the
506 * dynticks_progress_counter to be the one that modifies 532 * rcu_dyntick_sched.dynticks to be the one that modifies
507 * it on exit. 533 * it on exit.
508 */ 534 */
509 if (per_cpu(rcu_update_flag, cpu)) { 535 if (per_cpu(rcu_update_flag, cpu)) {
@@ -515,28 +541,29 @@ void rcu_irq_exit(void)
515 541
516 /* 542 /*
517 * If an NMI/SMI happens now we are still 543 * If an NMI/SMI happens now we are still
518 * protected by the dynticks_progress_counter being odd. 544 * protected by the rcu_dyntick_sched.dynticks being odd.
519 */ 545 */
520 546
521 /* 547 /*
522 * The following memory barrier ensures that any 548 * The following memory barrier ensures that any
523 * rcu_read_unlock() primitives in the irq handler 549 * rcu_read_unlock() primitives in the irq handler
524 * are seen by other CPUs to preceed the following 550 * are seen by other CPUs to preceed the following
525 * increment to dynticks_progress_counter. This 551 * increment to rcu_dyntick_sched.dynticks. This
526 * is required in order for other CPUs to determine 552 * is required in order for other CPUs to determine
527 * when it is safe to advance the RCU grace-period 553 * when it is safe to advance the RCU grace-period
528 * state machine. 554 * state machine.
529 */ 555 */
530 smp_mb(); /* see above block comment. */ 556 smp_mb(); /* see above block comment. */
531 per_cpu(dynticks_progress_counter, cpu)++; 557 rdssp->dynticks++;
532 WARN_ON(per_cpu(dynticks_progress_counter, cpu) & 0x1); 558 WARN_ON(rdssp->dynticks & 0x1);
533 } 559 }
534} 560}
535 561
536static void dyntick_save_progress_counter(int cpu) 562static void dyntick_save_progress_counter(int cpu)
537{ 563{
538 per_cpu(rcu_dyntick_snapshot, cpu) = 564 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
539 per_cpu(dynticks_progress_counter, cpu); 565
566 rdssp->dynticks_snap = rdssp->dynticks;
540} 567}
541 568
542static inline int 569static inline int
@@ -544,9 +571,10 @@ rcu_try_flip_waitack_needed(int cpu)
544{ 571{
545 long curr; 572 long curr;
546 long snap; 573 long snap;
574 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
547 575
548 curr = per_cpu(dynticks_progress_counter, cpu); 576 curr = rdssp->dynticks;
549 snap = per_cpu(rcu_dyntick_snapshot, cpu); 577 snap = rdssp->dynticks_snap;
550 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ 578 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
551 579
552 /* 580 /*
@@ -567,7 +595,7 @@ rcu_try_flip_waitack_needed(int cpu)
567 * that this CPU already acknowledged the counter. 595 * that this CPU already acknowledged the counter.
568 */ 596 */
569 597
570 if ((curr - snap) > 2 || (snap & 0x1) == 0) 598 if ((curr - snap) > 2 || (curr & 0x1) == 0)
571 return 0; 599 return 0;
572 600
573 /* We need this CPU to explicitly acknowledge the counter flip. */ 601 /* We need this CPU to explicitly acknowledge the counter flip. */
@@ -580,9 +608,10 @@ rcu_try_flip_waitmb_needed(int cpu)
580{ 608{
581 long curr; 609 long curr;
582 long snap; 610 long snap;
611 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
583 612
584 curr = per_cpu(dynticks_progress_counter, cpu); 613 curr = rdssp->dynticks;
585 snap = per_cpu(rcu_dyntick_snapshot, cpu); 614 snap = rdssp->dynticks_snap;
586 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ 615 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
587 616
588 /* 617 /*
@@ -609,14 +638,86 @@ rcu_try_flip_waitmb_needed(int cpu)
609 return 1; 638 return 1;
610} 639}
611 640
641static void dyntick_save_progress_counter_sched(int cpu)
642{
643 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
644
645 rdssp->sched_dynticks_snap = rdssp->dynticks;
646}
647
648static int rcu_qsctr_inc_needed_dyntick(int cpu)
649{
650 long curr;
651 long snap;
652 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
653
654 curr = rdssp->dynticks;
655 snap = rdssp->sched_dynticks_snap;
656 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
657
658 /*
659 * If the CPU remained in dynticks mode for the entire time
660 * and didn't take any interrupts, NMIs, SMIs, or whatever,
661 * then it cannot be in the middle of an rcu_read_lock(), so
662 * the next rcu_read_lock() it executes must use the new value
663 * of the counter. Therefore, this CPU has been in a quiescent
664 * state the entire time, and we don't need to wait for it.
665 */
666
667 if ((curr == snap) && ((curr & 0x1) == 0))
668 return 0;
669
670 /*
671 * If the CPU passed through or entered a dynticks idle phase with
672 * no active irq handlers, then, as above, this CPU has already
673 * passed through a quiescent state.
674 */
675
676 if ((curr - snap) > 2 || (snap & 0x1) == 0)
677 return 0;
678
679 /* We need this CPU to go through a quiescent state. */
680
681 return 1;
682}
683
612#else /* !CONFIG_NO_HZ */ 684#else /* !CONFIG_NO_HZ */
613 685
614# define dyntick_save_progress_counter(cpu) do { } while (0) 686# define dyntick_save_progress_counter(cpu) do { } while (0)
615# define rcu_try_flip_waitack_needed(cpu) (1) 687# define rcu_try_flip_waitack_needed(cpu) (1)
616# define rcu_try_flip_waitmb_needed(cpu) (1) 688# define rcu_try_flip_waitmb_needed(cpu) (1)
689
690# define dyntick_save_progress_counter_sched(cpu) do { } while (0)
691# define rcu_qsctr_inc_needed_dyntick(cpu) (1)
617 692
618#endif /* CONFIG_NO_HZ */ 693#endif /* CONFIG_NO_HZ */
619 694
695static void save_qsctr_sched(int cpu)
696{
697 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
698
699 rdssp->sched_qs_snap = rdssp->sched_qs;
700}
701
702static inline int rcu_qsctr_inc_needed(int cpu)
703{
704 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
705
706 /*
707 * If there has been a quiescent state, no more need to wait
708 * on this CPU.
709 */
710
711 if (rdssp->sched_qs != rdssp->sched_qs_snap) {
712 smp_mb(); /* force ordering with cpu entering schedule(). */
713 return 0;
714 }
715
716 /* We need this CPU to go through a quiescent state. */
717
718 return 1;
719}
720
620/* 721/*
621 * Get here when RCU is idle. Decide whether we need to 722 * Get here when RCU is idle. Decide whether we need to
622 * move out of idle state, and return non-zero if so. 723 * move out of idle state, and return non-zero if so.
@@ -819,6 +920,26 @@ void rcu_check_callbacks(int cpu, int user)
819 unsigned long flags; 920 unsigned long flags;
820 struct rcu_data *rdp = RCU_DATA_CPU(cpu); 921 struct rcu_data *rdp = RCU_DATA_CPU(cpu);
821 922
923 /*
924 * If this CPU took its interrupt from user mode or from the
925 * idle loop, and this is not a nested interrupt, then
926 * this CPU has to have exited all prior preept-disable
927 * sections of code. So increment the counter to note this.
928 *
929 * The memory barrier is needed to handle the case where
930 * writes from a preempt-disable section of code get reordered
931 * into schedule() by this CPU's write buffer. So the memory
932 * barrier makes sure that the rcu_qsctr_inc() is seen by other
933 * CPUs to happen after any such write.
934 */
935
936 if (user ||
937 (idle_cpu(cpu) && !in_softirq() &&
938 hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
939 smp_mb(); /* Guard against aggressive schedule(). */
940 rcu_qsctr_inc(cpu);
941 }
942
822 rcu_check_mb(cpu); 943 rcu_check_mb(cpu);
823 if (rcu_ctrlblk.completed == rdp->completed) 944 if (rcu_ctrlblk.completed == rdp->completed)
824 rcu_try_flip(); 945 rcu_try_flip();
@@ -869,6 +990,8 @@ void rcu_offline_cpu(int cpu)
869 struct rcu_head *list = NULL; 990 struct rcu_head *list = NULL;
870 unsigned long flags; 991 unsigned long flags;
871 struct rcu_data *rdp = RCU_DATA_CPU(cpu); 992 struct rcu_data *rdp = RCU_DATA_CPU(cpu);
993 struct rcu_head *schedlist = NULL;
994 struct rcu_head **schedtail = &schedlist;
872 struct rcu_head **tail = &list; 995 struct rcu_head **tail = &list;
873 996
874 /* 997 /*
@@ -882,6 +1005,11 @@ void rcu_offline_cpu(int cpu)
882 rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i], 1005 rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i],
883 list, tail); 1006 list, tail);
884 rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail); 1007 rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail);
1008 rcu_offline_cpu_enqueue(rdp->waitschedlist, rdp->waitschedtail,
1009 schedlist, schedtail);
1010 rcu_offline_cpu_enqueue(rdp->nextschedlist, rdp->nextschedtail,
1011 schedlist, schedtail);
1012 rdp->rcu_sched_sleeping = 0;
885 spin_unlock_irqrestore(&rdp->lock, flags); 1013 spin_unlock_irqrestore(&rdp->lock, flags);
886 rdp->waitlistcount = 0; 1014 rdp->waitlistcount = 0;
887 1015
@@ -916,12 +1044,15 @@ void rcu_offline_cpu(int cpu)
916 * fix. 1044 * fix.
917 */ 1045 */
918 1046
919 local_irq_save(flags); 1047 local_irq_save(flags); /* disable preempt till we know what lock. */
920 rdp = RCU_DATA_ME(); 1048 rdp = RCU_DATA_ME();
921 spin_lock(&rdp->lock); 1049 spin_lock(&rdp->lock);
922 *rdp->nexttail = list; 1050 *rdp->nexttail = list;
923 if (list) 1051 if (list)
924 rdp->nexttail = tail; 1052 rdp->nexttail = tail;
1053 *rdp->nextschedtail = schedlist;
1054 if (schedlist)
1055 rdp->nextschedtail = schedtail;
925 spin_unlock_irqrestore(&rdp->lock, flags); 1056 spin_unlock_irqrestore(&rdp->lock, flags);
926} 1057}
927 1058
@@ -936,10 +1067,25 @@ void rcu_offline_cpu(int cpu)
936void __cpuinit rcu_online_cpu(int cpu) 1067void __cpuinit rcu_online_cpu(int cpu)
937{ 1068{
938 unsigned long flags; 1069 unsigned long flags;
1070 struct rcu_data *rdp;
939 1071
940 spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags); 1072 spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
941 cpu_set(cpu, rcu_cpu_online_map); 1073 cpu_set(cpu, rcu_cpu_online_map);
942 spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags); 1074 spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
1075
1076 /*
1077 * The rcu_sched grace-period processing might have bypassed
1078 * this CPU, given that it was not in the rcu_cpu_online_map
1079 * when the grace-period scan started. This means that the
1080 * grace-period task might sleep. So make sure that if this
1081 * should happen, the first callback posted to this CPU will
1082 * wake up the grace-period task if need be.
1083 */
1084
1085 rdp = RCU_DATA_CPU(cpu);
1086 spin_lock_irqsave(&rdp->lock, flags);
1087 rdp->rcu_sched_sleeping = 1;
1088 spin_unlock_irqrestore(&rdp->lock, flags);
943} 1089}
944 1090
945static void rcu_process_callbacks(struct softirq_action *unused) 1091static void rcu_process_callbacks(struct softirq_action *unused)
@@ -982,31 +1128,196 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
982 *rdp->nexttail = head; 1128 *rdp->nexttail = head;
983 rdp->nexttail = &head->next; 1129 rdp->nexttail = &head->next;
984 RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp); 1130 RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp);
985 spin_unlock(&rdp->lock); 1131 spin_unlock_irqrestore(&rdp->lock, flags);
986 local_irq_restore(flags);
987} 1132}
988EXPORT_SYMBOL_GPL(call_rcu); 1133EXPORT_SYMBOL_GPL(call_rcu);
989 1134
1135void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
1136{
1137 unsigned long flags;
1138 struct rcu_data *rdp;
1139 int wake_gp = 0;
1140
1141 head->func = func;
1142 head->next = NULL;
1143 local_irq_save(flags);
1144 rdp = RCU_DATA_ME();
1145 spin_lock(&rdp->lock);
1146 *rdp->nextschedtail = head;
1147 rdp->nextschedtail = &head->next;
1148 if (rdp->rcu_sched_sleeping) {
1149
1150 /* Grace-period processing might be sleeping... */
1151
1152 rdp->rcu_sched_sleeping = 0;
1153 wake_gp = 1;
1154 }
1155 spin_unlock_irqrestore(&rdp->lock, flags);
1156 if (wake_gp) {
1157
1158 /* Wake up grace-period processing, unless someone beat us. */
1159
1160 spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
1161 if (rcu_ctrlblk.sched_sleep != rcu_sched_sleeping)
1162 wake_gp = 0;
1163 rcu_ctrlblk.sched_sleep = rcu_sched_not_sleeping;
1164 spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
1165 if (wake_gp)
1166 wake_up_interruptible(&rcu_ctrlblk.sched_wq);
1167 }
1168}
1169EXPORT_SYMBOL_GPL(call_rcu_sched);
1170
990/* 1171/*
991 * Wait until all currently running preempt_disable() code segments 1172 * Wait until all currently running preempt_disable() code segments
992 * (including hardware-irq-disable segments) complete. Note that 1173 * (including hardware-irq-disable segments) complete. Note that
993 * in -rt this does -not- necessarily result in all currently executing 1174 * in -rt this does -not- necessarily result in all currently executing
994 * interrupt -handlers- having completed. 1175 * interrupt -handlers- having completed.
995 */ 1176 */
996void __synchronize_sched(void) 1177synchronize_rcu_xxx(__synchronize_sched, call_rcu_sched)
1178EXPORT_SYMBOL_GPL(__synchronize_sched);
1179
1180/*
1181 * kthread function that manages call_rcu_sched grace periods.
1182 */
1183static int rcu_sched_grace_period(void *arg)
997{ 1184{
998 cpumask_t oldmask; 1185 int couldsleep; /* might sleep after current pass. */
1186 int couldsleepnext = 0; /* might sleep after next pass. */
999 int cpu; 1187 int cpu;
1188 unsigned long flags;
1189 struct rcu_data *rdp;
1190 int ret;
1000 1191
1001 if (sched_getaffinity(0, &oldmask) < 0) 1192 /*
1002 oldmask = cpu_possible_map; 1193 * Each pass through the following loop handles one
1003 for_each_online_cpu(cpu) { 1194 * rcu_sched grace period cycle.
1004 sched_setaffinity(0, &cpumask_of_cpu(cpu)); 1195 */
1005 schedule(); 1196 do {
1006 } 1197 /* Save each CPU's current state. */
1007 sched_setaffinity(0, &oldmask); 1198
1199 for_each_online_cpu(cpu) {
1200 dyntick_save_progress_counter_sched(cpu);
1201 save_qsctr_sched(cpu);
1202 }
1203
1204 /*
1205 * Sleep for about an RCU grace-period's worth to
1206 * allow better batching and to consume less CPU.
1207 */
1208 schedule_timeout_interruptible(RCU_SCHED_BATCH_TIME);
1209
1210 /*
1211 * If there was nothing to do last time, prepare to
1212 * sleep at the end of the current grace period cycle.
1213 */
1214 couldsleep = couldsleepnext;
1215 couldsleepnext = 1;
1216 if (couldsleep) {
1217 spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
1218 rcu_ctrlblk.sched_sleep = rcu_sched_sleep_prep;
1219 spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
1220 }
1221
1222 /*
1223 * Wait on each CPU in turn to have either visited
1224 * a quiescent state or been in dynticks-idle mode.
1225 */
1226 for_each_online_cpu(cpu) {
1227 while (rcu_qsctr_inc_needed(cpu) &&
1228 rcu_qsctr_inc_needed_dyntick(cpu)) {
1229 /* resched_cpu(cpu); @@@ */
1230 schedule_timeout_interruptible(1);
1231 }
1232 }
1233
1234 /* Advance callbacks for each CPU. */
1235
1236 for_each_online_cpu(cpu) {
1237
1238 rdp = RCU_DATA_CPU(cpu);
1239 spin_lock_irqsave(&rdp->lock, flags);
1240
1241 /*
1242 * We are running on this CPU irq-disabled, so no
1243 * CPU can go offline until we re-enable irqs.
1244 * The current CPU might have already gone
1245 * offline (between the for_each_offline_cpu and
1246 * the spin_lock_irqsave), but in that case all its
1247 * callback lists will be empty, so no harm done.
1248 *
1249 * Advance the callbacks! We share normal RCU's
1250 * donelist, since callbacks are invoked the
1251 * same way in either case.
1252 */
1253 if (rdp->waitschedlist != NULL) {
1254 *rdp->donetail = rdp->waitschedlist;
1255 rdp->donetail = rdp->waitschedtail;
1256
1257 /*
1258 * Next rcu_check_callbacks() will
1259 * do the required raise_softirq().
1260 */
1261 }
1262 if (rdp->nextschedlist != NULL) {
1263 rdp->waitschedlist = rdp->nextschedlist;
1264 rdp->waitschedtail = rdp->nextschedtail;
1265 couldsleep = 0;
1266 couldsleepnext = 0;
1267 } else {
1268 rdp->waitschedlist = NULL;
1269 rdp->waitschedtail = &rdp->waitschedlist;
1270 }
1271 rdp->nextschedlist = NULL;
1272 rdp->nextschedtail = &rdp->nextschedlist;
1273
1274 /* Mark sleep intention. */
1275
1276 rdp->rcu_sched_sleeping = couldsleep;
1277
1278 spin_unlock_irqrestore(&rdp->lock, flags);
1279 }
1280
1281 /* If we saw callbacks on the last scan, go deal with them. */
1282
1283 if (!couldsleep)
1284 continue;
1285
1286 /* Attempt to block... */
1287
1288 spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
1289 if (rcu_ctrlblk.sched_sleep != rcu_sched_sleep_prep) {
1290
1291 /*
1292 * Someone posted a callback after we scanned.
1293 * Go take care of it.
1294 */
1295 spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
1296 couldsleepnext = 0;
1297 continue;
1298 }
1299
1300 /* Block until the next person posts a callback. */
1301
1302 rcu_ctrlblk.sched_sleep = rcu_sched_sleeping;
1303 spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
1304 ret = 0;
1305 __wait_event_interruptible(rcu_ctrlblk.sched_wq,
1306 rcu_ctrlblk.sched_sleep != rcu_sched_sleeping,
1307 ret);
1308
1309 /*
1310 * Signals would prevent us from sleeping, and we cannot
1311 * do much with them in any case. So flush them.
1312 */
1313 if (ret)
1314 flush_signals(current);
1315 couldsleepnext = 0;
1316
1317 } while (!kthread_should_stop());
1318
1319 return (0);
1008} 1320}
1009EXPORT_SYMBOL_GPL(__synchronize_sched);
1010 1321
1011/* 1322/*
1012 * Check to see if any future RCU-related work will need to be done 1323 * Check to see if any future RCU-related work will need to be done
@@ -1023,7 +1334,9 @@ int rcu_needs_cpu(int cpu)
1023 1334
1024 return (rdp->donelist != NULL || 1335 return (rdp->donelist != NULL ||
1025 !!rdp->waitlistcount || 1336 !!rdp->waitlistcount ||
1026 rdp->nextlist != NULL); 1337 rdp->nextlist != NULL ||
1338 rdp->nextschedlist != NULL ||
1339 rdp->waitschedlist != NULL);
1027} 1340}
1028 1341
1029int rcu_pending(int cpu) 1342int rcu_pending(int cpu)
@@ -1034,7 +1347,9 @@ int rcu_pending(int cpu)
1034 1347
1035 if (rdp->donelist != NULL || 1348 if (rdp->donelist != NULL ||
1036 !!rdp->waitlistcount || 1349 !!rdp->waitlistcount ||
1037 rdp->nextlist != NULL) 1350 rdp->nextlist != NULL ||
1351 rdp->nextschedlist != NULL ||
1352 rdp->waitschedlist != NULL)
1038 return 1; 1353 return 1;
1039 1354
1040 /* The RCU core needs an acknowledgement from this CPU. */ 1355 /* The RCU core needs an acknowledgement from this CPU. */
@@ -1101,6 +1416,11 @@ void __init __rcu_init(void)
1101 rdp->donetail = &rdp->donelist; 1416 rdp->donetail = &rdp->donelist;
1102 rdp->rcu_flipctr[0] = 0; 1417 rdp->rcu_flipctr[0] = 0;
1103 rdp->rcu_flipctr[1] = 0; 1418 rdp->rcu_flipctr[1] = 0;
1419 rdp->nextschedlist = NULL;
1420 rdp->nextschedtail = &rdp->nextschedlist;
1421 rdp->waitschedlist = NULL;
1422 rdp->waitschedtail = &rdp->waitschedlist;
1423 rdp->rcu_sched_sleeping = 0;
1104 } 1424 }
1105 register_cpu_notifier(&rcu_nb); 1425 register_cpu_notifier(&rcu_nb);
1106 1426
@@ -1123,11 +1443,15 @@ void __init __rcu_init(void)
1123} 1443}
1124 1444
1125/* 1445/*
1126 * Deprecated, use synchronize_rcu() or synchronize_sched() instead. 1446 * Late-boot-time RCU initialization that must wait until after scheduler
1447 * has been initialized.
1127 */ 1448 */
1128void synchronize_kernel(void) 1449void __init rcu_init_sched(void)
1129{ 1450{
1130 synchronize_rcu(); 1451 rcu_sched_grace_period_task = kthread_run(rcu_sched_grace_period,
1452 NULL,
1453 "rcu_sched_grace_period");
1454 WARN_ON(IS_ERR(rcu_sched_grace_period_task));
1131} 1455}
1132 1456
1133#ifdef CONFIG_RCU_TRACE 1457#ifdef CONFIG_RCU_TRACE
diff --git a/kernel/rcupreempt_trace.c b/kernel/rcupreempt_trace.c
index 49ac4947af24..5edf82c34bbc 100644
--- a/kernel/rcupreempt_trace.c
+++ b/kernel/rcupreempt_trace.c
@@ -38,7 +38,6 @@
38#include <linux/moduleparam.h> 38#include <linux/moduleparam.h>
39#include <linux/percpu.h> 39#include <linux/percpu.h>
40#include <linux/notifier.h> 40#include <linux/notifier.h>
41#include <linux/rcupdate.h>
42#include <linux/cpu.h> 41#include <linux/cpu.h>
43#include <linux/mutex.h> 42#include <linux/mutex.h>
44#include <linux/rcupreempt_trace.h> 43#include <linux/rcupreempt_trace.h>
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 33acc424667e..90b5b123f7a1 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -57,7 +57,9 @@ static int stat_interval; /* Interval between stats, in seconds. */
57 /* Defaults to "only at end of test". */ 57 /* Defaults to "only at end of test". */
58static int verbose; /* Print more debug info. */ 58static int verbose; /* Print more debug info. */
59static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */ 59static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */
60static int shuffle_interval = 5; /* Interval between shuffles (in sec)*/ 60static int shuffle_interval = 3; /* Interval between shuffles (in sec)*/
61static int stutter = 5; /* Start/stop testing interval (in sec) */
62static int irqreader = 1; /* RCU readers from irq (timers). */
61static char *torture_type = "rcu"; /* What RCU implementation to torture. */ 63static char *torture_type = "rcu"; /* What RCU implementation to torture. */
62 64
63module_param(nreaders, int, 0444); 65module_param(nreaders, int, 0444);
@@ -72,6 +74,10 @@ module_param(test_no_idle_hz, bool, 0444);
72MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs"); 74MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs");
73module_param(shuffle_interval, int, 0444); 75module_param(shuffle_interval, int, 0444);
74MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles"); 76MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles");
77module_param(stutter, int, 0444);
78MODULE_PARM_DESC(stutter, "Number of seconds to run/halt test");
79module_param(irqreader, int, 0444);
80MODULE_PARM_DESC(irqreader, "Allow RCU readers from irq handlers");
75module_param(torture_type, charp, 0444); 81module_param(torture_type, charp, 0444);
76MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)"); 82MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)");
77 83
@@ -91,6 +97,7 @@ static struct task_struct **fakewriter_tasks;
91static struct task_struct **reader_tasks; 97static struct task_struct **reader_tasks;
92static struct task_struct *stats_task; 98static struct task_struct *stats_task;
93static struct task_struct *shuffler_task; 99static struct task_struct *shuffler_task;
100static struct task_struct *stutter_task;
94 101
95#define RCU_TORTURE_PIPE_LEN 10 102#define RCU_TORTURE_PIPE_LEN 10
96 103
@@ -117,8 +124,18 @@ static atomic_t n_rcu_torture_alloc_fail;
117static atomic_t n_rcu_torture_free; 124static atomic_t n_rcu_torture_free;
118static atomic_t n_rcu_torture_mberror; 125static atomic_t n_rcu_torture_mberror;
119static atomic_t n_rcu_torture_error; 126static atomic_t n_rcu_torture_error;
127static long n_rcu_torture_timers = 0;
120static struct list_head rcu_torture_removed; 128static struct list_head rcu_torture_removed;
121 129
130static int stutter_pause_test = 0;
131
132#if defined(MODULE) || defined(CONFIG_RCU_TORTURE_TEST_RUNNABLE)
133#define RCUTORTURE_RUNNABLE_INIT 1
134#else
135#define RCUTORTURE_RUNNABLE_INIT 0
136#endif
137int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT;
138
122/* 139/*
123 * Allocate an element from the rcu_tortures pool. 140 * Allocate an element from the rcu_tortures pool.
124 */ 141 */
@@ -179,6 +196,16 @@ rcu_random(struct rcu_random_state *rrsp)
179 return swahw32(rrsp->rrs_state); 196 return swahw32(rrsp->rrs_state);
180} 197}
181 198
199static void
200rcu_stutter_wait(void)
201{
202 while (stutter_pause_test || !rcutorture_runnable)
203 if (rcutorture_runnable)
204 schedule_timeout_interruptible(1);
205 else
206 schedule_timeout_interruptible(round_jiffies_relative(HZ));
207}
208
182/* 209/*
183 * Operations vector for selecting different types of tests. 210 * Operations vector for selecting different types of tests.
184 */ 211 */
@@ -192,7 +219,9 @@ struct rcu_torture_ops {
192 int (*completed)(void); 219 int (*completed)(void);
193 void (*deferredfree)(struct rcu_torture *p); 220 void (*deferredfree)(struct rcu_torture *p);
194 void (*sync)(void); 221 void (*sync)(void);
222 void (*cb_barrier)(void);
195 int (*stats)(char *page); 223 int (*stats)(char *page);
224 int irqcapable;
196 char *name; 225 char *name;
197}; 226};
198static struct rcu_torture_ops *cur_ops = NULL; 227static struct rcu_torture_ops *cur_ops = NULL;
@@ -265,7 +294,9 @@ static struct rcu_torture_ops rcu_ops = {
265 .completed = rcu_torture_completed, 294 .completed = rcu_torture_completed,
266 .deferredfree = rcu_torture_deferred_free, 295 .deferredfree = rcu_torture_deferred_free,
267 .sync = synchronize_rcu, 296 .sync = synchronize_rcu,
297 .cb_barrier = rcu_barrier,
268 .stats = NULL, 298 .stats = NULL,
299 .irqcapable = 1,
269 .name = "rcu" 300 .name = "rcu"
270}; 301};
271 302
@@ -304,7 +335,9 @@ static struct rcu_torture_ops rcu_sync_ops = {
304 .completed = rcu_torture_completed, 335 .completed = rcu_torture_completed,
305 .deferredfree = rcu_sync_torture_deferred_free, 336 .deferredfree = rcu_sync_torture_deferred_free,
306 .sync = synchronize_rcu, 337 .sync = synchronize_rcu,
338 .cb_barrier = NULL,
307 .stats = NULL, 339 .stats = NULL,
340 .irqcapable = 1,
308 .name = "rcu_sync" 341 .name = "rcu_sync"
309}; 342};
310 343
@@ -364,7 +397,9 @@ static struct rcu_torture_ops rcu_bh_ops = {
364 .completed = rcu_bh_torture_completed, 397 .completed = rcu_bh_torture_completed,
365 .deferredfree = rcu_bh_torture_deferred_free, 398 .deferredfree = rcu_bh_torture_deferred_free,
366 .sync = rcu_bh_torture_synchronize, 399 .sync = rcu_bh_torture_synchronize,
400 .cb_barrier = rcu_barrier_bh,
367 .stats = NULL, 401 .stats = NULL,
402 .irqcapable = 1,
368 .name = "rcu_bh" 403 .name = "rcu_bh"
369}; 404};
370 405
@@ -377,7 +412,9 @@ static struct rcu_torture_ops rcu_bh_sync_ops = {
377 .completed = rcu_bh_torture_completed, 412 .completed = rcu_bh_torture_completed,
378 .deferredfree = rcu_sync_torture_deferred_free, 413 .deferredfree = rcu_sync_torture_deferred_free,
379 .sync = rcu_bh_torture_synchronize, 414 .sync = rcu_bh_torture_synchronize,
415 .cb_barrier = NULL,
380 .stats = NULL, 416 .stats = NULL,
417 .irqcapable = 1,
381 .name = "rcu_bh_sync" 418 .name = "rcu_bh_sync"
382}; 419};
383 420
@@ -458,6 +495,7 @@ static struct rcu_torture_ops srcu_ops = {
458 .completed = srcu_torture_completed, 495 .completed = srcu_torture_completed,
459 .deferredfree = rcu_sync_torture_deferred_free, 496 .deferredfree = rcu_sync_torture_deferred_free,
460 .sync = srcu_torture_synchronize, 497 .sync = srcu_torture_synchronize,
498 .cb_barrier = NULL,
461 .stats = srcu_torture_stats, 499 .stats = srcu_torture_stats,
462 .name = "srcu" 500 .name = "srcu"
463}; 501};
@@ -482,6 +520,11 @@ static int sched_torture_completed(void)
482 return 0; 520 return 0;
483} 521}
484 522
523static void rcu_sched_torture_deferred_free(struct rcu_torture *p)
524{
525 call_rcu_sched(&p->rtort_rcu, rcu_torture_cb);
526}
527
485static void sched_torture_synchronize(void) 528static void sched_torture_synchronize(void)
486{ 529{
487 synchronize_sched(); 530 synchronize_sched();
@@ -494,12 +537,28 @@ static struct rcu_torture_ops sched_ops = {
494 .readdelay = rcu_read_delay, /* just reuse rcu's version. */ 537 .readdelay = rcu_read_delay, /* just reuse rcu's version. */
495 .readunlock = sched_torture_read_unlock, 538 .readunlock = sched_torture_read_unlock,
496 .completed = sched_torture_completed, 539 .completed = sched_torture_completed,
497 .deferredfree = rcu_sync_torture_deferred_free, 540 .deferredfree = rcu_sched_torture_deferred_free,
498 .sync = sched_torture_synchronize, 541 .sync = sched_torture_synchronize,
542 .cb_barrier = rcu_barrier_sched,
499 .stats = NULL, 543 .stats = NULL,
544 .irqcapable = 1,
500 .name = "sched" 545 .name = "sched"
501}; 546};
502 547
548static struct rcu_torture_ops sched_ops_sync = {
549 .init = rcu_sync_torture_init,
550 .cleanup = NULL,
551 .readlock = sched_torture_read_lock,
552 .readdelay = rcu_read_delay, /* just reuse rcu's version. */
553 .readunlock = sched_torture_read_unlock,
554 .completed = sched_torture_completed,
555 .deferredfree = rcu_sync_torture_deferred_free,
556 .sync = sched_torture_synchronize,
557 .cb_barrier = NULL,
558 .stats = NULL,
559 .name = "sched_sync"
560};
561
503/* 562/*
504 * RCU torture writer kthread. Repeatedly substitutes a new structure 563 * RCU torture writer kthread. Repeatedly substitutes a new structure
505 * for that pointed to by rcu_torture_current, freeing the old structure 564 * for that pointed to by rcu_torture_current, freeing the old structure
@@ -537,6 +596,7 @@ rcu_torture_writer(void *arg)
537 } 596 }
538 rcu_torture_current_version++; 597 rcu_torture_current_version++;
539 oldbatch = cur_ops->completed(); 598 oldbatch = cur_ops->completed();
599 rcu_stutter_wait();
540 } while (!kthread_should_stop() && !fullstop); 600 } while (!kthread_should_stop() && !fullstop);
541 VERBOSE_PRINTK_STRING("rcu_torture_writer task stopping"); 601 VERBOSE_PRINTK_STRING("rcu_torture_writer task stopping");
542 while (!kthread_should_stop()) 602 while (!kthread_should_stop())
@@ -560,6 +620,7 @@ rcu_torture_fakewriter(void *arg)
560 schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10); 620 schedule_timeout_uninterruptible(1 + rcu_random(&rand)%10);
561 udelay(rcu_random(&rand) & 0x3ff); 621 udelay(rcu_random(&rand) & 0x3ff);
562 cur_ops->sync(); 622 cur_ops->sync();
623 rcu_stutter_wait();
563 } while (!kthread_should_stop() && !fullstop); 624 } while (!kthread_should_stop() && !fullstop);
564 625
565 VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping"); 626 VERBOSE_PRINTK_STRING("rcu_torture_fakewriter task stopping");
@@ -569,6 +630,52 @@ rcu_torture_fakewriter(void *arg)
569} 630}
570 631
571/* 632/*
633 * RCU torture reader from timer handler. Dereferences rcu_torture_current,
634 * incrementing the corresponding element of the pipeline array. The
635 * counter in the element should never be greater than 1, otherwise, the
636 * RCU implementation is broken.
637 */
638static void rcu_torture_timer(unsigned long unused)
639{
640 int idx;
641 int completed;
642 static DEFINE_RCU_RANDOM(rand);
643 static DEFINE_SPINLOCK(rand_lock);
644 struct rcu_torture *p;
645 int pipe_count;
646
647 idx = cur_ops->readlock();
648 completed = cur_ops->completed();
649 p = rcu_dereference(rcu_torture_current);
650 if (p == NULL) {
651 /* Leave because rcu_torture_writer is not yet underway */
652 cur_ops->readunlock(idx);
653 return;
654 }
655 if (p->rtort_mbtest == 0)
656 atomic_inc(&n_rcu_torture_mberror);
657 spin_lock(&rand_lock);
658 cur_ops->readdelay(&rand);
659 n_rcu_torture_timers++;
660 spin_unlock(&rand_lock);
661 preempt_disable();
662 pipe_count = p->rtort_pipe_count;
663 if (pipe_count > RCU_TORTURE_PIPE_LEN) {
664 /* Should not happen, but... */
665 pipe_count = RCU_TORTURE_PIPE_LEN;
666 }
667 ++__get_cpu_var(rcu_torture_count)[pipe_count];
668 completed = cur_ops->completed() - completed;
669 if (completed > RCU_TORTURE_PIPE_LEN) {
670 /* Should not happen, but... */
671 completed = RCU_TORTURE_PIPE_LEN;
672 }
673 ++__get_cpu_var(rcu_torture_batch)[completed];
674 preempt_enable();
675 cur_ops->readunlock(idx);
676}
677
678/*
572 * RCU torture reader kthread. Repeatedly dereferences rcu_torture_current, 679 * RCU torture reader kthread. Repeatedly dereferences rcu_torture_current,
573 * incrementing the corresponding element of the pipeline array. The 680 * incrementing the corresponding element of the pipeline array. The
574 * counter in the element should never be greater than 1, otherwise, the 681 * counter in the element should never be greater than 1, otherwise, the
@@ -582,11 +689,18 @@ rcu_torture_reader(void *arg)
582 DEFINE_RCU_RANDOM(rand); 689 DEFINE_RCU_RANDOM(rand);
583 struct rcu_torture *p; 690 struct rcu_torture *p;
584 int pipe_count; 691 int pipe_count;
692 struct timer_list t;
585 693
586 VERBOSE_PRINTK_STRING("rcu_torture_reader task started"); 694 VERBOSE_PRINTK_STRING("rcu_torture_reader task started");
587 set_user_nice(current, 19); 695 set_user_nice(current, 19);
696 if (irqreader && cur_ops->irqcapable)
697 setup_timer_on_stack(&t, rcu_torture_timer, 0);
588 698
589 do { 699 do {
700 if (irqreader && cur_ops->irqcapable) {
701 if (!timer_pending(&t))
702 mod_timer(&t, 1);
703 }
590 idx = cur_ops->readlock(); 704 idx = cur_ops->readlock();
591 completed = cur_ops->completed(); 705 completed = cur_ops->completed();
592 p = rcu_dereference(rcu_torture_current); 706 p = rcu_dereference(rcu_torture_current);
@@ -615,8 +729,11 @@ rcu_torture_reader(void *arg)
615 preempt_enable(); 729 preempt_enable();
616 cur_ops->readunlock(idx); 730 cur_ops->readunlock(idx);
617 schedule(); 731 schedule();
732 rcu_stutter_wait();
618 } while (!kthread_should_stop() && !fullstop); 733 } while (!kthread_should_stop() && !fullstop);
619 VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping"); 734 VERBOSE_PRINTK_STRING("rcu_torture_reader task stopping");
735 if (irqreader && cur_ops->irqcapable)
736 del_timer_sync(&t);
620 while (!kthread_should_stop()) 737 while (!kthread_should_stop())
621 schedule_timeout_uninterruptible(1); 738 schedule_timeout_uninterruptible(1);
622 return 0; 739 return 0;
@@ -647,20 +764,22 @@ rcu_torture_printk(char *page)
647 cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG); 764 cnt += sprintf(&page[cnt], "%s%s ", torture_type, TORTURE_FLAG);
648 cnt += sprintf(&page[cnt], 765 cnt += sprintf(&page[cnt],
649 "rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d " 766 "rtc: %p ver: %ld tfle: %d rta: %d rtaf: %d rtf: %d "
650 "rtmbe: %d", 767 "rtmbe: %d nt: %ld",
651 rcu_torture_current, 768 rcu_torture_current,
652 rcu_torture_current_version, 769 rcu_torture_current_version,
653 list_empty(&rcu_torture_freelist), 770 list_empty(&rcu_torture_freelist),
654 atomic_read(&n_rcu_torture_alloc), 771 atomic_read(&n_rcu_torture_alloc),
655 atomic_read(&n_rcu_torture_alloc_fail), 772 atomic_read(&n_rcu_torture_alloc_fail),
656 atomic_read(&n_rcu_torture_free), 773 atomic_read(&n_rcu_torture_free),
657 atomic_read(&n_rcu_torture_mberror)); 774 atomic_read(&n_rcu_torture_mberror),
775 n_rcu_torture_timers);
658 if (atomic_read(&n_rcu_torture_mberror) != 0) 776 if (atomic_read(&n_rcu_torture_mberror) != 0)
659 cnt += sprintf(&page[cnt], " !!!"); 777 cnt += sprintf(&page[cnt], " !!!");
660 cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG); 778 cnt += sprintf(&page[cnt], "\n%s%s ", torture_type, TORTURE_FLAG);
661 if (i > 1) { 779 if (i > 1) {
662 cnt += sprintf(&page[cnt], "!!! "); 780 cnt += sprintf(&page[cnt], "!!! ");
663 atomic_inc(&n_rcu_torture_error); 781 atomic_inc(&n_rcu_torture_error);
782 WARN_ON_ONCE(1);
664 } 783 }
665 cnt += sprintf(&page[cnt], "Reader Pipe: "); 784 cnt += sprintf(&page[cnt], "Reader Pipe: ");
666 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) 785 for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
@@ -785,15 +904,34 @@ rcu_torture_shuffle(void *arg)
785 return 0; 904 return 0;
786} 905}
787 906
907/* Cause the rcutorture test to "stutter", starting and stopping all
908 * threads periodically.
909 */
910static int
911rcu_torture_stutter(void *arg)
912{
913 VERBOSE_PRINTK_STRING("rcu_torture_stutter task started");
914 do {
915 schedule_timeout_interruptible(stutter * HZ);
916 stutter_pause_test = 1;
917 if (!kthread_should_stop())
918 schedule_timeout_interruptible(stutter * HZ);
919 stutter_pause_test = 0;
920 } while (!kthread_should_stop());
921 VERBOSE_PRINTK_STRING("rcu_torture_stutter task stopping");
922 return 0;
923}
924
788static inline void 925static inline void
789rcu_torture_print_module_parms(char *tag) 926rcu_torture_print_module_parms(char *tag)
790{ 927{
791 printk(KERN_ALERT "%s" TORTURE_FLAG 928 printk(KERN_ALERT "%s" TORTURE_FLAG
792 "--- %s: nreaders=%d nfakewriters=%d " 929 "--- %s: nreaders=%d nfakewriters=%d "
793 "stat_interval=%d verbose=%d test_no_idle_hz=%d " 930 "stat_interval=%d verbose=%d test_no_idle_hz=%d "
794 "shuffle_interval = %d\n", 931 "shuffle_interval=%d stutter=%d irqreader=%d\n",
795 torture_type, tag, nrealreaders, nfakewriters, 932 torture_type, tag, nrealreaders, nfakewriters,
796 stat_interval, verbose, test_no_idle_hz, shuffle_interval); 933 stat_interval, verbose, test_no_idle_hz, shuffle_interval,
934 stutter, irqreader);
797} 935}
798 936
799static void 937static void
@@ -802,6 +940,11 @@ rcu_torture_cleanup(void)
802 int i; 940 int i;
803 941
804 fullstop = 1; 942 fullstop = 1;
943 if (stutter_task) {
944 VERBOSE_PRINTK_STRING("Stopping rcu_torture_stutter task");
945 kthread_stop(stutter_task);
946 }
947 stutter_task = NULL;
805 if (shuffler_task) { 948 if (shuffler_task) {
806 VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task"); 949 VERBOSE_PRINTK_STRING("Stopping rcu_torture_shuffle task");
807 kthread_stop(shuffler_task); 950 kthread_stop(shuffler_task);
@@ -848,7 +991,9 @@ rcu_torture_cleanup(void)
848 stats_task = NULL; 991 stats_task = NULL;
849 992
850 /* Wait for all RCU callbacks to fire. */ 993 /* Wait for all RCU callbacks to fire. */
851 rcu_barrier(); 994
995 if (cur_ops->cb_barrier != NULL)
996 cur_ops->cb_barrier();
852 997
853 rcu_torture_stats_print(); /* -After- the stats thread is stopped! */ 998 rcu_torture_stats_print(); /* -After- the stats thread is stopped! */
854 999
@@ -868,7 +1013,7 @@ rcu_torture_init(void)
868 int firsterr = 0; 1013 int firsterr = 0;
869 static struct rcu_torture_ops *torture_ops[] = 1014 static struct rcu_torture_ops *torture_ops[] =
870 { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops, 1015 { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops,
871 &srcu_ops, &sched_ops, }; 1016 &srcu_ops, &sched_ops, &sched_ops_sync, };
872 1017
873 /* Process args and tell the world that the torturer is on the job. */ 1018 /* Process args and tell the world that the torturer is on the job. */
874 for (i = 0; i < ARRAY_SIZE(torture_ops); i++) { 1019 for (i = 0; i < ARRAY_SIZE(torture_ops); i++) {
@@ -988,6 +1133,19 @@ rcu_torture_init(void)
988 goto unwind; 1133 goto unwind;
989 } 1134 }
990 } 1135 }
1136 if (stutter < 0)
1137 stutter = 0;
1138 if (stutter) {
1139 /* Create the stutter thread */
1140 stutter_task = kthread_run(rcu_torture_stutter, NULL,
1141 "rcu_torture_stutter");
1142 if (IS_ERR(stutter_task)) {
1143 firsterr = PTR_ERR(stutter_task);
1144 VERBOSE_PRINTK_ERRSTRING("Failed to create stutter");
1145 stutter_task = NULL;
1146 goto unwind;
1147 }
1148 }
991 return 0; 1149 return 0;
992 1150
993unwind: 1151unwind:
diff --git a/kernel/smp.c b/kernel/smp.c
index 4f582b257eba..ab10793b0707 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -8,6 +8,7 @@
8#include <linux/module.h> 8#include <linux/module.h>
9#include <linux/percpu.h> 9#include <linux/percpu.h>
10#include <linux/rcupdate.h> 10#include <linux/rcupdate.h>
11#include <linux/rculist.h>
11#include <linux/smp.h> 12#include <linux/smp.h>
12 13
13static DEFINE_PER_CPU(struct call_single_queue, call_single_queue); 14static DEFINE_PER_CPU(struct call_single_queue, call_single_queue);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 0d562d6531eb..6b16e16428d8 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -83,6 +83,9 @@ extern int maps_protect;
83extern int sysctl_stat_interval; 83extern int sysctl_stat_interval;
84extern int latencytop_enabled; 84extern int latencytop_enabled;
85extern int sysctl_nr_open_min, sysctl_nr_open_max; 85extern int sysctl_nr_open_min, sysctl_nr_open_max;
86#ifdef CONFIG_RCU_TORTURE_TEST
87extern int rcutorture_runnable;
88#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
86 89
87/* Constants used for minimum and maximum */ 90/* Constants used for minimum and maximum */
88#if defined(CONFIG_DETECT_SOFTLOCKUP) || defined(CONFIG_HIGHMEM) 91#if defined(CONFIG_DETECT_SOFTLOCKUP) || defined(CONFIG_HIGHMEM)
@@ -820,6 +823,16 @@ static struct ctl_table kern_table[] = {
820 .child = key_sysctls, 823 .child = key_sysctls,
821 }, 824 },
822#endif 825#endif
826#ifdef CONFIG_RCU_TORTURE_TEST
827 {
828 .ctl_name = CTL_UNNUMBERED,
829 .procname = "rcutorture_runnable",
830 .data = &rcutorture_runnable,
831 .maxlen = sizeof(int),
832 .mode = 0644,
833 .proc_handler = &proc_dointvec,
834 },
835#endif
823/* 836/*
824 * NOTE: do not add new entries to this table unless you have read 837 * NOTE: do not add new entries to this table unless you have read
825 * Documentation/sysctl/ctl_unnumbered.txt 838 * Documentation/sysctl/ctl_unnumbered.txt
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c459e8547bd8..df27132a56f4 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -530,16 +530,34 @@ config BOOT_PRINTK_DELAY
530config RCU_TORTURE_TEST 530config RCU_TORTURE_TEST
531 tristate "torture tests for RCU" 531 tristate "torture tests for RCU"
532 depends on DEBUG_KERNEL 532 depends on DEBUG_KERNEL
533 depends on m
534 default n 533 default n
535 help 534 help
536 This option provides a kernel module that runs torture tests 535 This option provides a kernel module that runs torture tests
537 on the RCU infrastructure. The kernel module may be built 536 on the RCU infrastructure. The kernel module may be built
538 after the fact on the running kernel to be tested, if desired. 537 after the fact on the running kernel to be tested, if desired.
539 538
539 Say Y here if you want RCU torture tests to be built into
540 the kernel.
540 Say M if you want the RCU torture tests to build as a module. 541 Say M if you want the RCU torture tests to build as a module.
541 Say N if you are unsure. 542 Say N if you are unsure.
542 543
544config RCU_TORTURE_TEST_RUNNABLE
545 bool "torture tests for RCU runnable by default"
546 depends on RCU_TORTURE_TEST = y
547 default n
548 help
549 This option provides a way to build the RCU torture tests
550 directly into the kernel without them starting up at boot
551 time. You can use /proc/sys/kernel/rcutorture_runnable
552 to manually override this setting. This /proc file is
553 available only when the RCU torture tests have been built
554 into the kernel.
555
556 Say Y here if you want the RCU torture tests to start during
557 boot (you probably don't).
558 Say N here if you want the RCU torture tests to start only
559 after being manually enabled via /proc.
560
543config KPROBES_SANITY_TEST 561config KPROBES_SANITY_TEST
544 bool "Kprobes sanity tests" 562 bool "Kprobes sanity tests"
545 depends on DEBUG_KERNEL 563 depends on DEBUG_KERNEL
diff --git a/lib/textsearch.c b/lib/textsearch.c
index be8bda3862f5..a3e500ad51d7 100644
--- a/lib/textsearch.c
+++ b/lib/textsearch.c
@@ -97,6 +97,7 @@
97#include <linux/types.h> 97#include <linux/types.h>
98#include <linux/string.h> 98#include <linux/string.h>
99#include <linux/init.h> 99#include <linux/init.h>
100#include <linux/rculist.h>
100#include <linux/rcupdate.h> 101#include <linux/rcupdate.h>
101#include <linux/err.h> 102#include <linux/err.h>
102#include <linux/textsearch.h> 103#include <linux/textsearch.h>
diff --git a/net/802/psnap.c b/net/802/psnap.c
index 31128cb92a23..ea4643931446 100644
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -20,6 +20,7 @@
20#include <linux/mm.h> 20#include <linux/mm.h>
21#include <linux/in.h> 21#include <linux/in.h>
22#include <linux/init.h> 22#include <linux/init.h>
23#include <linux/rculist.h>
23 24
24static LIST_HEAD(snap_list); 25static LIST_HEAD(snap_list);
25static DEFINE_SPINLOCK(snap_lock); 26static DEFINE_SPINLOCK(snap_lock);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index ab2225da0ee2..08f14f6c5fd6 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -27,6 +27,7 @@
27#include <linux/mm.h> 27#include <linux/mm.h>
28#include <linux/in.h> 28#include <linux/in.h>
29#include <linux/init.h> 29#include <linux/init.h>
30#include <linux/rculist.h>
30#include <net/p8022.h> 31#include <net/p8022.h>
31#include <net/arp.h> 32#include <net/arp.h>
32#include <linux/rtnetlink.h> 33#include <linux/rtnetlink.h>
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 72c5976a5ce3..142060f02054 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -15,6 +15,7 @@
15 15
16#include <linux/kernel.h> 16#include <linux/kernel.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/rculist.h>
18#include <linux/spinlock.h> 19#include <linux/spinlock.h>
19#include <linux/times.h> 20#include <linux/times.h>
20#include <linux/netdevice.h> 21#include <linux/netdevice.h>
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index e38034aa56f5..9e96ffcd29a3 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -13,6 +13,7 @@
13 * 2 of the License, or (at your option) any later version. 13 * 2 of the License, or (at your option) any later version.
14 */ 14 */
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16#include <linux/rculist.h>
16 17
17#include "br_private.h" 18#include "br_private.h"
18#include "br_private_stp.h" 19#include "br_private_stp.h"
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 7d1b11703741..8e0b4c8f62a8 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -20,6 +20,7 @@
20#include <linux/err.h> 20#include <linux/err.h>
21#include <linux/kernel.h> 21#include <linux/kernel.h>
22#include <linux/netdevice.h> 22#include <linux/netdevice.h>
23#include <linux/rculist.h>
23 24
24#include <net/netfilter/nf_conntrack.h> 25#include <net/netfilter/nf_conntrack.h>
25#include <net/netfilter/nf_conntrack_l3proto.h> 26#include <net/netfilter/nf_conntrack_l3proto.h>
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 0edefcfc5949..077bcd228799 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -18,6 +18,7 @@
18#include <linux/init.h> 18#include <linux/init.h>
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/kernel.h> 20#include <linux/kernel.h>
21#include <linux/rculist.h>
21#include <linux/types.h> 22#include <linux/types.h>
22#include <linux/timer.h> 23#include <linux/timer.h>
23#include <linux/skbuff.h> 24#include <linux/skbuff.h>
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index 02c2f7c0b255..643c032a3a57 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -30,8 +30,7 @@
30 */ 30 */
31 31
32#include <linux/types.h> 32#include <linux/types.h>
33#include <linux/rcupdate.h> 33#include <linux/rculist.h>
34#include <linux/list.h>
35#include <linux/skbuff.h> 34#include <linux/skbuff.h>
36#include <linux/spinlock.h> 35#include <linux/spinlock.h>
37#include <linux/string.h> 36#include <linux/string.h>