aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/RCU/NMI-RCU.txt3
-rw-r--r--Documentation/RCU/RTFP.txt108
-rw-r--r--Documentation/RCU/checklist.txt89
-rw-r--r--Documentation/RCU/whatisRCU.txt58
-rw-r--r--arch/ia64/sn/kernel/irq.c1
-rw-r--r--crypto/async_tx/async_tx.c1
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs.c1
-rw-r--r--drivers/infiniband/hw/ipath/ipath_verbs_mcast.c3
-rw-r--r--drivers/net/macvlan.c2
-rw-r--r--include/linux/dcache.h1
-rw-r--r--include/linux/list.h367
-rw-r--r--include/linux/rcuclassic.h3
-rw-r--r--include/linux/rculist.h373
-rw-r--r--include/linux/rcupdate.h26
-rw-r--r--include/linux/rcupreempt.h42
-rw-r--r--init/main.c1
-rw-r--r--kernel/pid.c1
-rw-r--r--kernel/rcuclassic.c30
-rw-r--r--kernel/rcupdate.c71
-rw-r--r--kernel/rcupreempt.c420
-rw-r--r--kernel/rcupreempt_trace.c1
-rw-r--r--kernel/rcutorture.c34
-rw-r--r--lib/Kconfig.debug3
-rw-r--r--lib/textsearch.c1
-rw-r--r--net/802/psnap.c1
-rw-r--r--net/8021q/vlan.c1
-rw-r--r--net/bridge/br_fdb.c1
-rw-r--r--net/bridge/br_stp.c1
-rw-r--r--net/netfilter/nf_conntrack_helper.c1
-rw-r--r--net/netfilter/nf_conntrack_netlink.c1
-rw-r--r--net/netlabel/netlabel_domainhash.c3
31 files changed, 1144 insertions, 505 deletions
diff --git a/Documentation/RCU/NMI-RCU.txt b/Documentation/RCU/NMI-RCU.txt
index c64158ecde43..a6d32e65d222 100644
--- a/Documentation/RCU/NMI-RCU.txt
+++ b/Documentation/RCU/NMI-RCU.txt
@@ -93,6 +93,9 @@ Since NMI handlers disable preemption, synchronize_sched() is guaranteed
93not to return until all ongoing NMI handlers exit. It is therefore safe 93not to return until all ongoing NMI handlers exit. It is therefore safe
94to free up the handler's data as soon as synchronize_sched() returns. 94to free up the handler's data as soon as synchronize_sched() returns.
95 95
96Important note: for this to work, the architecture in question must
97invoke irq_enter() and irq_exit() on NMI entry and exit, respectively.
98
96 99
97Answer to Quick Quiz 100Answer to Quick Quiz
98 101
diff --git a/Documentation/RCU/RTFP.txt b/Documentation/RCU/RTFP.txt
index 39ad8f56783a..9f711d2df91b 100644
--- a/Documentation/RCU/RTFP.txt
+++ b/Documentation/RCU/RTFP.txt
@@ -52,6 +52,10 @@ of each iteration. Unfortunately, chaotic relaxation requires highly
52structured data, such as the matrices used in scientific programs, and 52structured data, such as the matrices used in scientific programs, and
53is thus inapplicable to most data structures in operating-system kernels. 53is thus inapplicable to most data structures in operating-system kernels.
54 54
55In 1992, Henry (now Alexia) Massalin completed a dissertation advising
56parallel programmers to defer processing when feasible to simplify
57synchronization. RCU makes extremely heavy use of this advice.
58
55In 1993, Jacobson [Jacobson93] verbally described what is perhaps the 59In 1993, Jacobson [Jacobson93] verbally described what is perhaps the
56simplest deferred-free technique: simply waiting a fixed amount of time 60simplest deferred-free technique: simply waiting a fixed amount of time
57before freeing blocks awaiting deferred free. Jacobson did not describe 61before freeing blocks awaiting deferred free. Jacobson did not describe
@@ -138,6 +142,13 @@ blocking in read-side critical sections appeared [PaulEMcKenney2006c],
138Robert Olsson described an RCU-protected trie-hash combination 142Robert Olsson described an RCU-protected trie-hash combination
139[RobertOlsson2006a]. 143[RobertOlsson2006a].
140 144
1452007 saw the journal version of the award-winning RCU paper from 2006
146[ThomasEHart2007a], as well as a paper demonstrating use of Promela
147and Spin to mechanically verify an optimization to Oleg Nesterov's
148QRCU [PaulEMcKenney2007QRCUspin], a design document describing
149preemptible RCU [PaulEMcKenney2007PreemptibleRCU], and the three-part
150LWN "What is RCU?" series [PaulEMcKenney2007WhatIsRCUFundamentally,
151PaulEMcKenney2008WhatIsRCUUsage, and PaulEMcKenney2008WhatIsRCUAPI].
141 152
142Bibtex Entries 153Bibtex Entries
143 154
@@ -202,6 +213,20 @@ Bibtex Entries
202,Year="1991" 213,Year="1991"
203} 214}
204 215
216@phdthesis{HMassalinPhD
217,author="H. Massalin"
218,title="Synthesis: An Efficient Implementation of Fundamental Operating
219System Services"
220,school="Columbia University"
221,address="New York, NY"
222,year="1992"
223,annotation="
224 Mondo optimizing compiler.
225 Wait-free stuff.
226 Good advice: defer work to avoid synchronization.
227"
228}
229
205@unpublished{Jacobson93 230@unpublished{Jacobson93
206,author="Van Jacobson" 231,author="Van Jacobson"
207,title="Avoid Read-Side Locking Via Delayed Free" 232,title="Avoid Read-Side Locking Via Delayed Free"
@@ -635,3 +660,86 @@ Revised:
635" 660"
636} 661}
637 662
663@unpublished{PaulEMcKenney2007PreemptibleRCU
664,Author="Paul E. McKenney"
665,Title="The design of preemptible read-copy-update"
666,month="October"
667,day="8"
668,year="2007"
669,note="Available:
670\url{http://lwn.net/Articles/253651/}
671[Viewed October 25, 2007]"
672,annotation="
673 LWN article describing the design of preemptible RCU.
674"
675}
676
677########################################################################
678#
679# "What is RCU?" LWN series.
680#
681
682@unpublished{PaulEMcKenney2007WhatIsRCUFundamentally
683,Author="Paul E. McKenney and Jonathan Walpole"
684,Title="What is {RCU}, Fundamentally?"
685,month="December"
686,day="17"
687,year="2007"
688,note="Available:
689\url{http://lwn.net/Articles/262464/}
690[Viewed December 27, 2007]"
691,annotation="
692 Lays out the three basic components of RCU: (1) publish-subscribe,
693 (2) wait for pre-existing readers to complete, and (2) maintain
694 multiple versions.
695"
696}
697
698@unpublished{PaulEMcKenney2008WhatIsRCUUsage
699,Author="Paul E. McKenney"
700,Title="What is {RCU}? Part 2: Usage"
701,month="January"
702,day="4"
703,year="2008"
704,note="Available:
705\url{http://lwn.net/Articles/263130/}
706[Viewed January 4, 2008]"
707,annotation="
708 Lays out six uses of RCU:
709 1. RCU is a Reader-Writer Lock Replacement
710 2. RCU is a Restricted Reference-Counting Mechanism
711 3. RCU is a Bulk Reference-Counting Mechanism
712 4. RCU is a Poor Man's Garbage Collector
713 5. RCU is a Way of Providing Existence Guarantees
714 6. RCU is a Way of Waiting for Things to Finish
715"
716}
717
718@unpublished{PaulEMcKenney2008WhatIsRCUAPI
719,Author="Paul E. McKenney"
720,Title="{RCU} part 3: the {RCU} {API}"
721,month="January"
722,day="17"
723,year="2008"
724,note="Available:
725\url{http://lwn.net/Articles/264090/}
726[Viewed January 10, 2008]"
727,annotation="
728 Gives an overview of the Linux-kernel RCU API and a brief annotated RCU
729 bibliography.
730"
731}
732
733@article{DinakarGuniguntala2008IBMSysJ
734,author="D. Guniguntala and P. E. McKenney and J. Triplett and J. Walpole"
735,title="The read-copy-update mechanism for supporting real-time applications on shared-memory multiprocessor systems with {Linux}"
736,Year="2008"
737,Month="April"
738,journal="IBM Systems Journal"
739,volume="47"
740,number="2"
741,pages="@@-@@"
742,annotation="
743 RCU, realtime RCU, sleepable RCU, performance.
744"
745}
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt
index 42b01bc2e1b4..cf5562cbe356 100644
--- a/Documentation/RCU/checklist.txt
+++ b/Documentation/RCU/checklist.txt
@@ -13,10 +13,13 @@ over a rather long period of time, but improvements are always welcome!
13 detailed performance measurements show that RCU is nonetheless 13 detailed performance measurements show that RCU is nonetheless
14 the right tool for the job. 14 the right tool for the job.
15 15
16 The other exception would be where performance is not an issue, 16 Another exception is where performance is not an issue, and RCU
17 and RCU provides a simpler implementation. An example of this 17 provides a simpler implementation. An example of this situation
18 situation is the dynamic NMI code in the Linux 2.6 kernel, 18 is the dynamic NMI code in the Linux 2.6 kernel, at least on
19 at least on architectures where NMIs are rare. 19 architectures where NMIs are rare.
20
21 Yet another exception is where the low real-time latency of RCU's
22 read-side primitives is critically important.
20 23
211. Does the update code have proper mutual exclusion? 241. Does the update code have proper mutual exclusion?
22 25
@@ -39,9 +42,10 @@ over a rather long period of time, but improvements are always welcome!
39 42
402. Do the RCU read-side critical sections make proper use of 432. Do the RCU read-side critical sections make proper use of
41 rcu_read_lock() and friends? These primitives are needed 44 rcu_read_lock() and friends? These primitives are needed
42 to suppress preemption (or bottom halves, in the case of 45 to prevent grace periods from ending prematurely, which
43 rcu_read_lock_bh()) in the read-side critical sections, 46 could result in data being unceremoniously freed out from
44 and are also an excellent aid to readability. 47 under your read-side code, which can greatly increase the
48 actuarial risk of your kernel.
45 49
46 As a rough rule of thumb, any dereference of an RCU-protected 50 As a rough rule of thumb, any dereference of an RCU-protected
47 pointer must be covered by rcu_read_lock() or rcu_read_lock_bh() 51 pointer must be covered by rcu_read_lock() or rcu_read_lock_bh()
@@ -54,15 +58,30 @@ over a rather long period of time, but improvements are always welcome!
54 be running while updates are in progress. There are a number 58 be running while updates are in progress. There are a number
55 of ways to handle this concurrency, depending on the situation: 59 of ways to handle this concurrency, depending on the situation:
56 60
57 a. Make updates appear atomic to readers. For example, 61 a. Use the RCU variants of the list and hlist update
62 primitives to add, remove, and replace elements on an
63 RCU-protected list. Alternatively, use the RCU-protected
64 trees that have been added to the Linux kernel.
65
66 This is almost always the best approach.
67
68 b. Proceed as in (a) above, but also maintain per-element
69 locks (that are acquired by both readers and writers)
70 that guard per-element state. Of course, fields that
71 the readers refrain from accessing can be guarded by the
72 update-side lock.
73
74 This works quite well, also.
75
76 c. Make updates appear atomic to readers. For example,
58 pointer updates to properly aligned fields will appear 77 pointer updates to properly aligned fields will appear
59 atomic, as will individual atomic primitives. Operations 78 atomic, as will individual atomic primitives. Operations
60 performed under a lock and sequences of multiple atomic 79 performed under a lock and sequences of multiple atomic
61 primitives will -not- appear to be atomic. 80 primitives will -not- appear to be atomic.
62 81
63 This is almost always the best approach. 82 This can work, but is starting to get a bit tricky.
64 83
65 b. Carefully order the updates and the reads so that 84 d. Carefully order the updates and the reads so that
66 readers see valid data at all phases of the update. 85 readers see valid data at all phases of the update.
67 This is often more difficult than it sounds, especially 86 This is often more difficult than it sounds, especially
68 given modern CPUs' tendency to reorder memory references. 87 given modern CPUs' tendency to reorder memory references.
@@ -123,18 +142,22 @@ over a rather long period of time, but improvements are always welcome!
123 when publicizing a pointer to a structure that can 142 when publicizing a pointer to a structure that can
124 be traversed by an RCU read-side critical section. 143 be traversed by an RCU read-side critical section.
125 144
1265. If call_rcu(), or a related primitive such as call_rcu_bh(), 1455. If call_rcu(), or a related primitive such as call_rcu_bh() or
127 is used, the callback function must be written to be called 146 call_rcu_sched(), is used, the callback function must be
128 from softirq context. In particular, it cannot block. 147 written to be called from softirq context. In particular,
148 it cannot block.
129 149
1306. Since synchronize_rcu() can block, it cannot be called from 1506. Since synchronize_rcu() can block, it cannot be called from
131 any sort of irq context. 151 any sort of irq context. Ditto for synchronize_sched() and
152 synchronize_srcu().
132 153
1337. If the updater uses call_rcu(), then the corresponding readers 1547. If the updater uses call_rcu(), then the corresponding readers
134 must use rcu_read_lock() and rcu_read_unlock(). If the updater 155 must use rcu_read_lock() and rcu_read_unlock(). If the updater
135 uses call_rcu_bh(), then the corresponding readers must use 156 uses call_rcu_bh(), then the corresponding readers must use
136 rcu_read_lock_bh() and rcu_read_unlock_bh(). Mixing things up 157 rcu_read_lock_bh() and rcu_read_unlock_bh(). If the updater
137 will result in confusion and broken kernels. 158 uses call_rcu_sched(), then the corresponding readers must
159 disable preemption. Mixing things up will result in confusion
160 and broken kernels.
138 161
139 One exception to this rule: rcu_read_lock() and rcu_read_unlock() 162 One exception to this rule: rcu_read_lock() and rcu_read_unlock()
140 may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh() 163 may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh()
@@ -143,9 +166,9 @@ over a rather long period of time, but improvements are always welcome!
143 such cases is a must, of course! And the jury is still out on 166 such cases is a must, of course! And the jury is still out on
144 whether the increased speed is worth it. 167 whether the increased speed is worth it.
145 168
1468. Although synchronize_rcu() is a bit slower than is call_rcu(), 1698. Although synchronize_rcu() is slower than is call_rcu(), it
147 it usually results in simpler code. So, unless update 170 usually results in simpler code. So, unless update performance
148 performance is critically important or the updaters cannot block, 171 is critically important or the updaters cannot block,
149 synchronize_rcu() should be used in preference to call_rcu(). 172 synchronize_rcu() should be used in preference to call_rcu().
150 173
151 An especially important property of the synchronize_rcu() 174 An especially important property of the synchronize_rcu()
@@ -187,23 +210,23 @@ over a rather long period of time, but improvements are always welcome!
187 number of updates per grace period. 210 number of updates per grace period.
188 211
1899. All RCU list-traversal primitives, which include 2129. All RCU list-traversal primitives, which include
190 list_for_each_rcu(), list_for_each_entry_rcu(), 213 rcu_dereference(), list_for_each_rcu(), list_for_each_entry_rcu(),
191 list_for_each_continue_rcu(), and list_for_each_safe_rcu(), 214 list_for_each_continue_rcu(), and list_for_each_safe_rcu(),
192 must be within an RCU read-side critical section. RCU 215 must be either within an RCU read-side critical section or
216 must be protected by appropriate update-side locks. RCU
193 read-side critical sections are delimited by rcu_read_lock() 217 read-side critical sections are delimited by rcu_read_lock()
194 and rcu_read_unlock(), or by similar primitives such as 218 and rcu_read_unlock(), or by similar primitives such as
195 rcu_read_lock_bh() and rcu_read_unlock_bh(). 219 rcu_read_lock_bh() and rcu_read_unlock_bh().
196 220
197 Use of the _rcu() list-traversal primitives outside of an 221 The reason that it is permissible to use RCU list-traversal
198 RCU read-side critical section causes no harm other than 222 primitives when the update-side lock is held is that doing so
199 a slight performance degradation on Alpha CPUs. It can 223 can be quite helpful in reducing code bloat when common code is
200 also be quite helpful in reducing code bloat when common 224 shared between readers and updaters.
201 code is shared between readers and updaters.
202 225
20310. Conversely, if you are in an RCU read-side critical section, 22610. Conversely, if you are in an RCU read-side critical section,
204 you -must- use the "_rcu()" variants of the list macros. 227 and you don't hold the appropriate update-side lock, you -must-
205 Failing to do so will break Alpha and confuse people reading 228 use the "_rcu()" variants of the list macros. Failing to do so
206 your code. 229 will break Alpha and confuse people reading your code.
207 230
20811. Note that synchronize_rcu() -only- guarantees to wait until 23111. Note that synchronize_rcu() -only- guarantees to wait until
209 all currently executing rcu_read_lock()-protected RCU read-side 232 all currently executing rcu_read_lock()-protected RCU read-side
@@ -230,6 +253,14 @@ over a rather long period of time, but improvements are always welcome!
230 must use whatever locking or other synchronization is required 253 must use whatever locking or other synchronization is required
231 to safely access and/or modify that data structure. 254 to safely access and/or modify that data structure.
232 255
256 RCU callbacks are -usually- executed on the same CPU that executed
257 the corresponding call_rcu(), call_rcu_bh(), or call_rcu_sched(),
258 but are by -no- means guaranteed to be. For example, if a given
259 CPU goes offline while having an RCU callback pending, then that
260 RCU callback will execute on some surviving CPU. (If this was
261 not the case, a self-spawning RCU callback would prevent the
262 victim CPU from ever going offline.)
263
23314. SRCU (srcu_read_lock(), srcu_read_unlock(), and synchronize_srcu()) 26414. SRCU (srcu_read_lock(), srcu_read_unlock(), and synchronize_srcu())
234 may only be invoked from process context. Unlike other forms of 265 may only be invoked from process context. Unlike other forms of
235 RCU, it -is- permissible to block in an SRCU read-side critical 266 RCU, it -is- permissible to block in an SRCU read-side critical
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index e0d6d99b8f9b..e04d643a9f57 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -1,3 +1,11 @@
1Please note that the "What is RCU?" LWN series is an excellent place
2to start learning about RCU:
3
41. What is RCU, Fundamentally? http://lwn.net/Articles/262464/
52. What is RCU? Part 2: Usage http://lwn.net/Articles/263130/
63. RCU part 3: the RCU API http://lwn.net/Articles/264090/
7
8
1What is RCU? 9What is RCU?
2 10
3RCU is a synchronization mechanism that was added to the Linux kernel 11RCU is a synchronization mechanism that was added to the Linux kernel
@@ -772,26 +780,18 @@ Linux-kernel source code, but it helps to have a full list of the
772APIs, since there does not appear to be a way to categorize them 780APIs, since there does not appear to be a way to categorize them
773in docbook. Here is the list, by category. 781in docbook. Here is the list, by category.
774 782
775Markers for RCU read-side critical sections:
776
777 rcu_read_lock
778 rcu_read_unlock
779 rcu_read_lock_bh
780 rcu_read_unlock_bh
781 srcu_read_lock
782 srcu_read_unlock
783
784RCU pointer/list traversal: 783RCU pointer/list traversal:
785 784
786 rcu_dereference 785 rcu_dereference
786 list_for_each_entry_rcu
787 hlist_for_each_entry_rcu
788
787 list_for_each_rcu (to be deprecated in favor of 789 list_for_each_rcu (to be deprecated in favor of
788 list_for_each_entry_rcu) 790 list_for_each_entry_rcu)
789 list_for_each_entry_rcu
790 list_for_each_continue_rcu (to be deprecated in favor of new 791 list_for_each_continue_rcu (to be deprecated in favor of new
791 list_for_each_entry_continue_rcu) 792 list_for_each_entry_continue_rcu)
792 hlist_for_each_entry_rcu
793 793
794RCU pointer update: 794RCU pointer/list update:
795 795
796 rcu_assign_pointer 796 rcu_assign_pointer
797 list_add_rcu 797 list_add_rcu
@@ -799,16 +799,36 @@ RCU pointer update:
799 list_del_rcu 799 list_del_rcu
800 list_replace_rcu 800 list_replace_rcu
801 hlist_del_rcu 801 hlist_del_rcu
802 hlist_add_after_rcu
803 hlist_add_before_rcu
802 hlist_add_head_rcu 804 hlist_add_head_rcu
805 hlist_replace_rcu
806 list_splice_init_rcu()
803 807
804RCU grace period: 808RCU: Critical sections Grace period Barrier
809
810 rcu_read_lock synchronize_net rcu_barrier
811 rcu_read_unlock synchronize_rcu
812 call_rcu
813
814
815bh: Critical sections Grace period Barrier
816
817 rcu_read_lock_bh call_rcu_bh rcu_barrier_bh
818 rcu_read_unlock_bh
819
820
821sched: Critical sections Grace period Barrier
822
823 [preempt_disable] synchronize_sched rcu_barrier_sched
824 [and friends] call_rcu_sched
825
826
827SRCU: Critical sections Grace period Barrier
828
829 srcu_read_lock synchronize_srcu N/A
830 srcu_read_unlock
805 831
806 synchronize_net
807 synchronize_sched
808 synchronize_rcu
809 synchronize_srcu
810 call_rcu
811 call_rcu_bh
812 832
813See the comment headers in the source code (or the docbook generated 833See the comment headers in the source code (or the docbook generated
814from them) for more information. 834from them) for more information.
diff --git a/arch/ia64/sn/kernel/irq.c b/arch/ia64/sn/kernel/irq.c
index 53351c3cd7b1..96c31b4180c3 100644
--- a/arch/ia64/sn/kernel/irq.c
+++ b/arch/ia64/sn/kernel/irq.c
@@ -11,6 +11,7 @@
11#include <linux/irq.h> 11#include <linux/irq.h>
12#include <linux/spinlock.h> 12#include <linux/spinlock.h>
13#include <linux/init.h> 13#include <linux/init.h>
14#include <linux/rculist.h>
14#include <asm/sn/addrs.h> 15#include <asm/sn/addrs.h>
15#include <asm/sn/arch.h> 16#include <asm/sn/arch.h>
16#include <asm/sn/intr.h> 17#include <asm/sn/intr.h>
diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c
index c6e772fc5ccd..095c798d3170 100644
--- a/crypto/async_tx/async_tx.c
+++ b/crypto/async_tx/async_tx.c
@@ -23,6 +23,7 @@
23 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 23 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
24 * 24 *
25 */ 25 */
26#include <linux/rculist.h>
26#include <linux/kernel.h> 27#include <linux/kernel.h>
27#include <linux/async_tx.h> 28#include <linux/async_tx.h>
28 29
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 7779165b2c2c..38c44dec51ca 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -35,6 +35,7 @@
35#include <rdma/ib_user_verbs.h> 35#include <rdma/ib_user_verbs.h>
36#include <linux/io.h> 36#include <linux/io.h>
37#include <linux/utsname.h> 37#include <linux/utsname.h>
38#include <linux/rculist.h>
38 39
39#include "ipath_kernel.h" 40#include "ipath_kernel.h"
40#include "ipath_verbs.h" 41#include "ipath_verbs.h"
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
index 9e5abf9c309d..d73e32232879 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
@@ -31,8 +31,7 @@
31 * SOFTWARE. 31 * SOFTWARE.
32 */ 32 */
33 33
34#include <linux/list.h> 34#include <linux/rculist.h>
35#include <linux/rcupdate.h>
36 35
37#include "ipath_verbs.h" 36#include "ipath_verbs.h"
38 37
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index c36a03ae9bfb..860d75d81f82 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -20,7 +20,7 @@
20#include <linux/errno.h> 20#include <linux/errno.h>
21#include <linux/slab.h> 21#include <linux/slab.h>
22#include <linux/string.h> 22#include <linux/string.h>
23#include <linux/list.h> 23#include <linux/rculist.h>
24#include <linux/notifier.h> 24#include <linux/notifier.h>
25#include <linux/netdevice.h> 25#include <linux/netdevice.h>
26#include <linux/etherdevice.h> 26#include <linux/etherdevice.h>
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 2a6639407c80..1f5cebf10a23 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -3,6 +3,7 @@
3 3
4#include <asm/atomic.h> 4#include <asm/atomic.h>
5#include <linux/list.h> 5#include <linux/list.h>
6#include <linux/rculist.h>
6#include <linux/spinlock.h> 7#include <linux/spinlock.h>
7#include <linux/cache.h> 8#include <linux/cache.h>
8#include <linux/rcupdate.h> 9#include <linux/rcupdate.h>
diff --git a/include/linux/list.h b/include/linux/list.h
index 08cf4f651889..139ec41d9c2e 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -85,65 +85,6 @@ static inline void list_add_tail(struct list_head *new, struct list_head *head)
85} 85}
86 86
87/* 87/*
88 * Insert a new entry between two known consecutive entries.
89 *
90 * This is only for internal list manipulation where we know
91 * the prev/next entries already!
92 */
93static inline void __list_add_rcu(struct list_head * new,
94 struct list_head * prev, struct list_head * next)
95{
96 new->next = next;
97 new->prev = prev;
98 smp_wmb();
99 next->prev = new;
100 prev->next = new;
101}
102
103/**
104 * list_add_rcu - add a new entry to rcu-protected list
105 * @new: new entry to be added
106 * @head: list head to add it after
107 *
108 * Insert a new entry after the specified head.
109 * This is good for implementing stacks.
110 *
111 * The caller must take whatever precautions are necessary
112 * (such as holding appropriate locks) to avoid racing
113 * with another list-mutation primitive, such as list_add_rcu()
114 * or list_del_rcu(), running on this same list.
115 * However, it is perfectly legal to run concurrently with
116 * the _rcu list-traversal primitives, such as
117 * list_for_each_entry_rcu().
118 */
119static inline void list_add_rcu(struct list_head *new, struct list_head *head)
120{
121 __list_add_rcu(new, head, head->next);
122}
123
124/**
125 * list_add_tail_rcu - add a new entry to rcu-protected list
126 * @new: new entry to be added
127 * @head: list head to add it before
128 *
129 * Insert a new entry before the specified head.
130 * This is useful for implementing queues.
131 *
132 * The caller must take whatever precautions are necessary
133 * (such as holding appropriate locks) to avoid racing
134 * with another list-mutation primitive, such as list_add_tail_rcu()
135 * or list_del_rcu(), running on this same list.
136 * However, it is perfectly legal to run concurrently with
137 * the _rcu list-traversal primitives, such as
138 * list_for_each_entry_rcu().
139 */
140static inline void list_add_tail_rcu(struct list_head *new,
141 struct list_head *head)
142{
143 __list_add_rcu(new, head->prev, head);
144}
145
146/*
147 * Delete a list entry by making the prev/next entries 88 * Delete a list entry by making the prev/next entries
148 * point to each other. 89 * point to each other.
149 * 90 *
@@ -174,36 +115,6 @@ extern void list_del(struct list_head *entry);
174#endif 115#endif
175 116
176/** 117/**
177 * list_del_rcu - deletes entry from list without re-initialization
178 * @entry: the element to delete from the list.
179 *
180 * Note: list_empty() on entry does not return true after this,
181 * the entry is in an undefined state. It is useful for RCU based
182 * lockfree traversal.
183 *
184 * In particular, it means that we can not poison the forward
185 * pointers that may still be used for walking the list.
186 *
187 * The caller must take whatever precautions are necessary
188 * (such as holding appropriate locks) to avoid racing
189 * with another list-mutation primitive, such as list_del_rcu()
190 * or list_add_rcu(), running on this same list.
191 * However, it is perfectly legal to run concurrently with
192 * the _rcu list-traversal primitives, such as
193 * list_for_each_entry_rcu().
194 *
195 * Note that the caller is not permitted to immediately free
196 * the newly deleted entry. Instead, either synchronize_rcu()
197 * or call_rcu() must be used to defer freeing until an RCU
198 * grace period has elapsed.
199 */
200static inline void list_del_rcu(struct list_head *entry)
201{
202 __list_del(entry->prev, entry->next);
203 entry->prev = LIST_POISON2;
204}
205
206/**
207 * list_replace - replace old entry by new one 118 * list_replace - replace old entry by new one
208 * @old : the element to be replaced 119 * @old : the element to be replaced
209 * @new : the new element to insert 120 * @new : the new element to insert
@@ -227,25 +138,6 @@ static inline void list_replace_init(struct list_head *old,
227} 138}
228 139
229/** 140/**
230 * list_replace_rcu - replace old entry by new one
231 * @old : the element to be replaced
232 * @new : the new element to insert
233 *
234 * The @old entry will be replaced with the @new entry atomically.
235 * Note: @old should not be empty.
236 */
237static inline void list_replace_rcu(struct list_head *old,
238 struct list_head *new)
239{
240 new->next = old->next;
241 new->prev = old->prev;
242 smp_wmb();
243 new->next->prev = new;
244 new->prev->next = new;
245 old->prev = LIST_POISON2;
246}
247
248/**
249 * list_del_init - deletes entry from list and reinitialize it. 141 * list_del_init - deletes entry from list and reinitialize it.
250 * @entry: the element to delete from the list. 142 * @entry: the element to delete from the list.
251 */ 143 */
@@ -369,62 +261,6 @@ static inline void list_splice_init(struct list_head *list,
369} 261}
370 262
371/** 263/**
372 * list_splice_init_rcu - splice an RCU-protected list into an existing list.
373 * @list: the RCU-protected list to splice
374 * @head: the place in the list to splice the first list into
375 * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ...
376 *
377 * @head can be RCU-read traversed concurrently with this function.
378 *
379 * Note that this function blocks.
380 *
381 * Important note: the caller must take whatever action is necessary to
382 * prevent any other updates to @head. In principle, it is possible
383 * to modify the list as soon as sync() begins execution.
384 * If this sort of thing becomes necessary, an alternative version
385 * based on call_rcu() could be created. But only if -really-
386 * needed -- there is no shortage of RCU API members.
387 */
388static inline void list_splice_init_rcu(struct list_head *list,
389 struct list_head *head,
390 void (*sync)(void))
391{
392 struct list_head *first = list->next;
393 struct list_head *last = list->prev;
394 struct list_head *at = head->next;
395
396 if (list_empty(head))
397 return;
398
399 /* "first" and "last" tracking list, so initialize it. */
400
401 INIT_LIST_HEAD(list);
402
403 /*
404 * At this point, the list body still points to the source list.
405 * Wait for any readers to finish using the list before splicing
406 * the list body into the new list. Any new readers will see
407 * an empty list.
408 */
409
410 sync();
411
412 /*
413 * Readers are finished with the source list, so perform splice.
414 * The order is important if the new list is global and accessible
415 * to concurrent RCU readers. Note that RCU readers are not
416 * permitted to traverse the prev pointers without excluding
417 * this function.
418 */
419
420 last->next = at;
421 smp_wmb();
422 head->next = first;
423 first->prev = head;
424 at->prev = last;
425}
426
427/**
428 * list_entry - get the struct for this entry 264 * list_entry - get the struct for this entry
429 * @ptr: the &struct list_head pointer. 265 * @ptr: the &struct list_head pointer.
430 * @type: the type of the struct this is embedded in. 266 * @type: the type of the struct this is embedded in.
@@ -629,57 +465,6 @@ static inline void list_splice_init_rcu(struct list_head *list,
629 &pos->member != (head); \ 465 &pos->member != (head); \
630 pos = n, n = list_entry(n->member.prev, typeof(*n), member)) 466 pos = n, n = list_entry(n->member.prev, typeof(*n), member))
631 467
632/**
633 * list_for_each_rcu - iterate over an rcu-protected list
634 * @pos: the &struct list_head to use as a loop cursor.
635 * @head: the head for your list.
636 *
637 * This list-traversal primitive may safely run concurrently with
638 * the _rcu list-mutation primitives such as list_add_rcu()
639 * as long as the traversal is guarded by rcu_read_lock().
640 */
641#define list_for_each_rcu(pos, head) \
642 for (pos = rcu_dereference((head)->next); \
643 prefetch(pos->next), pos != (head); \
644 pos = rcu_dereference(pos->next))
645
646#define __list_for_each_rcu(pos, head) \
647 for (pos = rcu_dereference((head)->next); \
648 pos != (head); \
649 pos = rcu_dereference(pos->next))
650
651/**
652 * list_for_each_entry_rcu - iterate over rcu list of given type
653 * @pos: the type * to use as a loop cursor.
654 * @head: the head for your list.
655 * @member: the name of the list_struct within the struct.
656 *
657 * This list-traversal primitive may safely run concurrently with
658 * the _rcu list-mutation primitives such as list_add_rcu()
659 * as long as the traversal is guarded by rcu_read_lock().
660 */
661#define list_for_each_entry_rcu(pos, head, member) \
662 for (pos = list_entry(rcu_dereference((head)->next), typeof(*pos), member); \
663 prefetch(pos->member.next), &pos->member != (head); \
664 pos = list_entry(rcu_dereference(pos->member.next), typeof(*pos), member))
665
666
667/**
668 * list_for_each_continue_rcu
669 * @pos: the &struct list_head to use as a loop cursor.
670 * @head: the head for your list.
671 *
672 * Iterate over an rcu-protected list, continuing after current point.
673 *
674 * This list-traversal primitive may safely run concurrently with
675 * the _rcu list-mutation primitives such as list_add_rcu()
676 * as long as the traversal is guarded by rcu_read_lock().
677 */
678#define list_for_each_continue_rcu(pos, head) \
679 for ((pos) = rcu_dereference((pos)->next); \
680 prefetch((pos)->next), (pos) != (head); \
681 (pos) = rcu_dereference((pos)->next))
682
683/* 468/*
684 * Double linked lists with a single pointer list head. 469 * Double linked lists with a single pointer list head.
685 * Mostly useful for hash tables where the two pointer list head is 470 * Mostly useful for hash tables where the two pointer list head is
@@ -730,31 +515,6 @@ static inline void hlist_del(struct hlist_node *n)
730 n->pprev = LIST_POISON2; 515 n->pprev = LIST_POISON2;
731} 516}
732 517
733/**
734 * hlist_del_rcu - deletes entry from hash list without re-initialization
735 * @n: the element to delete from the hash list.
736 *
737 * Note: list_unhashed() on entry does not return true after this,
738 * the entry is in an undefined state. It is useful for RCU based
739 * lockfree traversal.
740 *
741 * In particular, it means that we can not poison the forward
742 * pointers that may still be used for walking the hash list.
743 *
744 * The caller must take whatever precautions are necessary
745 * (such as holding appropriate locks) to avoid racing
746 * with another list-mutation primitive, such as hlist_add_head_rcu()
747 * or hlist_del_rcu(), running on this same list.
748 * However, it is perfectly legal to run concurrently with
749 * the _rcu list-traversal primitives, such as
750 * hlist_for_each_entry().
751 */
752static inline void hlist_del_rcu(struct hlist_node *n)
753{
754 __hlist_del(n);
755 n->pprev = LIST_POISON2;
756}
757
758static inline void hlist_del_init(struct hlist_node *n) 518static inline void hlist_del_init(struct hlist_node *n)
759{ 519{
760 if (!hlist_unhashed(n)) { 520 if (!hlist_unhashed(n)) {
@@ -763,27 +523,6 @@ static inline void hlist_del_init(struct hlist_node *n)
763 } 523 }
764} 524}
765 525
766/**
767 * hlist_replace_rcu - replace old entry by new one
768 * @old : the element to be replaced
769 * @new : the new element to insert
770 *
771 * The @old entry will be replaced with the @new entry atomically.
772 */
773static inline void hlist_replace_rcu(struct hlist_node *old,
774 struct hlist_node *new)
775{
776 struct hlist_node *next = old->next;
777
778 new->next = next;
779 new->pprev = old->pprev;
780 smp_wmb();
781 if (next)
782 new->next->pprev = &new->next;
783 *new->pprev = new;
784 old->pprev = LIST_POISON2;
785}
786
787static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) 526static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
788{ 527{
789 struct hlist_node *first = h->first; 528 struct hlist_node *first = h->first;
@@ -794,38 +533,6 @@ static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
794 n->pprev = &h->first; 533 n->pprev = &h->first;
795} 534}
796 535
797
798/**
799 * hlist_add_head_rcu
800 * @n: the element to add to the hash list.
801 * @h: the list to add to.
802 *
803 * Description:
804 * Adds the specified element to the specified hlist,
805 * while permitting racing traversals.
806 *
807 * The caller must take whatever precautions are necessary
808 * (such as holding appropriate locks) to avoid racing
809 * with another list-mutation primitive, such as hlist_add_head_rcu()
810 * or hlist_del_rcu(), running on this same list.
811 * However, it is perfectly legal to run concurrently with
812 * the _rcu list-traversal primitives, such as
813 * hlist_for_each_entry_rcu(), used to prevent memory-consistency
814 * problems on Alpha CPUs. Regardless of the type of CPU, the
815 * list-traversal primitive must be guarded by rcu_read_lock().
816 */
817static inline void hlist_add_head_rcu(struct hlist_node *n,
818 struct hlist_head *h)
819{
820 struct hlist_node *first = h->first;
821 n->next = first;
822 n->pprev = &h->first;
823 smp_wmb();
824 if (first)
825 first->pprev = &n->next;
826 h->first = n;
827}
828
829/* next must be != NULL */ 536/* next must be != NULL */
830static inline void hlist_add_before(struct hlist_node *n, 537static inline void hlist_add_before(struct hlist_node *n,
831 struct hlist_node *next) 538 struct hlist_node *next)
@@ -847,63 +554,6 @@ static inline void hlist_add_after(struct hlist_node *n,
847 next->next->pprev = &next->next; 554 next->next->pprev = &next->next;
848} 555}
849 556
850/**
851 * hlist_add_before_rcu
852 * @n: the new element to add to the hash list.
853 * @next: the existing element to add the new element before.
854 *
855 * Description:
856 * Adds the specified element to the specified hlist
857 * before the specified node while permitting racing traversals.
858 *
859 * The caller must take whatever precautions are necessary
860 * (such as holding appropriate locks) to avoid racing
861 * with another list-mutation primitive, such as hlist_add_head_rcu()
862 * or hlist_del_rcu(), running on this same list.
863 * However, it is perfectly legal to run concurrently with
864 * the _rcu list-traversal primitives, such as
865 * hlist_for_each_entry_rcu(), used to prevent memory-consistency
866 * problems on Alpha CPUs.
867 */
868static inline void hlist_add_before_rcu(struct hlist_node *n,
869 struct hlist_node *next)
870{
871 n->pprev = next->pprev;
872 n->next = next;
873 smp_wmb();
874 next->pprev = &n->next;
875 *(n->pprev) = n;
876}
877
878/**
879 * hlist_add_after_rcu
880 * @prev: the existing element to add the new element after.
881 * @n: the new element to add to the hash list.
882 *
883 * Description:
884 * Adds the specified element to the specified hlist
885 * after the specified node while permitting racing traversals.
886 *
887 * The caller must take whatever precautions are necessary
888 * (such as holding appropriate locks) to avoid racing
889 * with another list-mutation primitive, such as hlist_add_head_rcu()
890 * or hlist_del_rcu(), running on this same list.
891 * However, it is perfectly legal to run concurrently with
892 * the _rcu list-traversal primitives, such as
893 * hlist_for_each_entry_rcu(), used to prevent memory-consistency
894 * problems on Alpha CPUs.
895 */
896static inline void hlist_add_after_rcu(struct hlist_node *prev,
897 struct hlist_node *n)
898{
899 n->next = prev->next;
900 n->pprev = &prev->next;
901 smp_wmb();
902 prev->next = n;
903 if (n->next)
904 n->next->pprev = &n->next;
905}
906
907#define hlist_entry(ptr, type, member) container_of(ptr,type,member) 557#define hlist_entry(ptr, type, member) container_of(ptr,type,member)
908 558
909#define hlist_for_each(pos, head) \ 559#define hlist_for_each(pos, head) \
@@ -964,21 +614,4 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
964 ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ 614 ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
965 pos = n) 615 pos = n)
966 616
967/**
968 * hlist_for_each_entry_rcu - iterate over rcu list of given type
969 * @tpos: the type * to use as a loop cursor.
970 * @pos: the &struct hlist_node to use as a loop cursor.
971 * @head: the head for your list.
972 * @member: the name of the hlist_node within the struct.
973 *
974 * This list-traversal primitive may safely run concurrently with
975 * the _rcu list-mutation primitives such as hlist_add_head_rcu()
976 * as long as the traversal is guarded by rcu_read_lock().
977 */
978#define hlist_for_each_entry_rcu(tpos, pos, head, member) \
979 for (pos = rcu_dereference((head)->first); \
980 pos && ({ prefetch(pos->next); 1;}) && \
981 ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
982 pos = rcu_dereference(pos->next))
983
984#endif 617#endif
diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
index b3aa05baab8a..8c774905dcfe 100644
--- a/include/linux/rcuclassic.h
+++ b/include/linux/rcuclassic.h
@@ -151,7 +151,10 @@ extern struct lockdep_map rcu_lock_map;
151 151
152#define __synchronize_sched() synchronize_rcu() 152#define __synchronize_sched() synchronize_rcu()
153 153
154#define call_rcu_sched(head, func) call_rcu(head, func)
155
154extern void __rcu_init(void); 156extern void __rcu_init(void);
157#define rcu_init_sched() do { } while (0)
155extern void rcu_check_callbacks(int cpu, int user); 158extern void rcu_check_callbacks(int cpu, int user);
156extern void rcu_restart_cpu(int cpu); 159extern void rcu_restart_cpu(int cpu);
157 160
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
new file mode 100644
index 000000000000..b0f39be08b6c
--- /dev/null
+++ b/include/linux/rculist.h
@@ -0,0 +1,373 @@
1#ifndef _LINUX_RCULIST_H
2#define _LINUX_RCULIST_H
3
4#ifdef __KERNEL__
5
6/*
7 * RCU-protected list version
8 */
9#include <linux/list.h>
10#include <linux/rcupdate.h>
11
12/*
13 * Insert a new entry between two known consecutive entries.
14 *
15 * This is only for internal list manipulation where we know
16 * the prev/next entries already!
17 */
18static inline void __list_add_rcu(struct list_head *new,
19 struct list_head *prev, struct list_head *next)
20{
21 new->next = next;
22 new->prev = prev;
23 rcu_assign_pointer(prev->next, new);
24 next->prev = new;
25}
26
27/**
28 * list_add_rcu - add a new entry to rcu-protected list
29 * @new: new entry to be added
30 * @head: list head to add it after
31 *
32 * Insert a new entry after the specified head.
33 * This is good for implementing stacks.
34 *
35 * The caller must take whatever precautions are necessary
36 * (such as holding appropriate locks) to avoid racing
37 * with another list-mutation primitive, such as list_add_rcu()
38 * or list_del_rcu(), running on this same list.
39 * However, it is perfectly legal to run concurrently with
40 * the _rcu list-traversal primitives, such as
41 * list_for_each_entry_rcu().
42 */
43static inline void list_add_rcu(struct list_head *new, struct list_head *head)
44{
45 __list_add_rcu(new, head, head->next);
46}
47
48/**
49 * list_add_tail_rcu - add a new entry to rcu-protected list
50 * @new: new entry to be added
51 * @head: list head to add it before
52 *
53 * Insert a new entry before the specified head.
54 * This is useful for implementing queues.
55 *
56 * The caller must take whatever precautions are necessary
57 * (such as holding appropriate locks) to avoid racing
58 * with another list-mutation primitive, such as list_add_tail_rcu()
59 * or list_del_rcu(), running on this same list.
60 * However, it is perfectly legal to run concurrently with
61 * the _rcu list-traversal primitives, such as
62 * list_for_each_entry_rcu().
63 */
64static inline void list_add_tail_rcu(struct list_head *new,
65 struct list_head *head)
66{
67 __list_add_rcu(new, head->prev, head);
68}
69
70/**
71 * list_del_rcu - deletes entry from list without re-initialization
72 * @entry: the element to delete from the list.
73 *
74 * Note: list_empty() on entry does not return true after this,
75 * the entry is in an undefined state. It is useful for RCU based
76 * lockfree traversal.
77 *
78 * In particular, it means that we can not poison the forward
79 * pointers that may still be used for walking the list.
80 *
81 * The caller must take whatever precautions are necessary
82 * (such as holding appropriate locks) to avoid racing
83 * with another list-mutation primitive, such as list_del_rcu()
84 * or list_add_rcu(), running on this same list.
85 * However, it is perfectly legal to run concurrently with
86 * the _rcu list-traversal primitives, such as
87 * list_for_each_entry_rcu().
88 *
89 * Note that the caller is not permitted to immediately free
90 * the newly deleted entry. Instead, either synchronize_rcu()
91 * or call_rcu() must be used to defer freeing until an RCU
92 * grace period has elapsed.
93 */
94static inline void list_del_rcu(struct list_head *entry)
95{
96 __list_del(entry->prev, entry->next);
97 entry->prev = LIST_POISON2;
98}
99
100/**
101 * list_replace_rcu - replace old entry by new one
102 * @old : the element to be replaced
103 * @new : the new element to insert
104 *
105 * The @old entry will be replaced with the @new entry atomically.
106 * Note: @old should not be empty.
107 */
108static inline void list_replace_rcu(struct list_head *old,
109 struct list_head *new)
110{
111 new->next = old->next;
112 new->prev = old->prev;
113 rcu_assign_pointer(new->prev->next, new);
114 new->next->prev = new;
115 old->prev = LIST_POISON2;
116}
117
118/**
119 * list_splice_init_rcu - splice an RCU-protected list into an existing list.
120 * @list: the RCU-protected list to splice
121 * @head: the place in the list to splice the first list into
122 * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ...
123 *
124 * @head can be RCU-read traversed concurrently with this function.
125 *
126 * Note that this function blocks.
127 *
128 * Important note: the caller must take whatever action is necessary to
129 * prevent any other updates to @head. In principle, it is possible
130 * to modify the list as soon as sync() begins execution.
131 * If this sort of thing becomes necessary, an alternative version
132 * based on call_rcu() could be created. But only if -really-
133 * needed -- there is no shortage of RCU API members.
134 */
135static inline void list_splice_init_rcu(struct list_head *list,
136 struct list_head *head,
137 void (*sync)(void))
138{
139 struct list_head *first = list->next;
140 struct list_head *last = list->prev;
141 struct list_head *at = head->next;
142
143 if (list_empty(head))
144 return;
145
146 /* "first" and "last" tracking list, so initialize it. */
147
148 INIT_LIST_HEAD(list);
149
150 /*
151 * At this point, the list body still points to the source list.
152 * Wait for any readers to finish using the list before splicing
153 * the list body into the new list. Any new readers will see
154 * an empty list.
155 */
156
157 sync();
158
159 /*
160 * Readers are finished with the source list, so perform splice.
161 * The order is important if the new list is global and accessible
162 * to concurrent RCU readers. Note that RCU readers are not
163 * permitted to traverse the prev pointers without excluding
164 * this function.
165 */
166
167 last->next = at;
168 rcu_assign_pointer(head->next, first);
169 first->prev = head;
170 at->prev = last;
171}
172
173/**
174 * list_for_each_rcu - iterate over an rcu-protected list
175 * @pos: the &struct list_head to use as a loop cursor.
176 * @head: the head for your list.
177 *
178 * This list-traversal primitive may safely run concurrently with
179 * the _rcu list-mutation primitives such as list_add_rcu()
180 * as long as the traversal is guarded by rcu_read_lock().
181 */
182#define list_for_each_rcu(pos, head) \
183 for (pos = rcu_dereference((head)->next); \
184 prefetch(pos->next), pos != (head); \
185 pos = rcu_dereference(pos->next))
186
187#define __list_for_each_rcu(pos, head) \
188 for (pos = rcu_dereference((head)->next); \
189 pos != (head); \
190 pos = rcu_dereference(pos->next))
191
192/**
193 * list_for_each_entry_rcu - iterate over rcu list of given type
194 * @pos: the type * to use as a loop cursor.
195 * @head: the head for your list.
196 * @member: the name of the list_struct within the struct.
197 *
198 * This list-traversal primitive may safely run concurrently with
199 * the _rcu list-mutation primitives such as list_add_rcu()
200 * as long as the traversal is guarded by rcu_read_lock().
201 */
202#define list_for_each_entry_rcu(pos, head, member) \
203 for (pos = list_entry(rcu_dereference((head)->next), typeof(*pos), member); \
204 prefetch(pos->member.next), &pos->member != (head); \
205 pos = list_entry(rcu_dereference(pos->member.next), typeof(*pos), member))
206
207
208/**
209 * list_for_each_continue_rcu
210 * @pos: the &struct list_head to use as a loop cursor.
211 * @head: the head for your list.
212 *
213 * Iterate over an rcu-protected list, continuing after current point.
214 *
215 * This list-traversal primitive may safely run concurrently with
216 * the _rcu list-mutation primitives such as list_add_rcu()
217 * as long as the traversal is guarded by rcu_read_lock().
218 */
219#define list_for_each_continue_rcu(pos, head) \
220 for ((pos) = rcu_dereference((pos)->next); \
221 prefetch((pos)->next), (pos) != (head); \
222 (pos) = rcu_dereference((pos)->next))
223
224/**
225 * hlist_del_rcu - deletes entry from hash list without re-initialization
226 * @n: the element to delete from the hash list.
227 *
228 * Note: list_unhashed() on entry does not return true after this,
229 * the entry is in an undefined state. It is useful for RCU based
230 * lockfree traversal.
231 *
232 * In particular, it means that we can not poison the forward
233 * pointers that may still be used for walking the hash list.
234 *
235 * The caller must take whatever precautions are necessary
236 * (such as holding appropriate locks) to avoid racing
237 * with another list-mutation primitive, such as hlist_add_head_rcu()
238 * or hlist_del_rcu(), running on this same list.
239 * However, it is perfectly legal to run concurrently with
240 * the _rcu list-traversal primitives, such as
241 * hlist_for_each_entry().
242 */
243static inline void hlist_del_rcu(struct hlist_node *n)
244{
245 __hlist_del(n);
246 n->pprev = LIST_POISON2;
247}
248
249/**
250 * hlist_replace_rcu - replace old entry by new one
251 * @old : the element to be replaced
252 * @new : the new element to insert
253 *
254 * The @old entry will be replaced with the @new entry atomically.
255 */
256static inline void hlist_replace_rcu(struct hlist_node *old,
257 struct hlist_node *new)
258{
259 struct hlist_node *next = old->next;
260
261 new->next = next;
262 new->pprev = old->pprev;
263 rcu_assign_pointer(*new->pprev, new);
264 if (next)
265 new->next->pprev = &new->next;
266 old->pprev = LIST_POISON2;
267}
268
269/**
270 * hlist_add_head_rcu
271 * @n: the element to add to the hash list.
272 * @h: the list to add to.
273 *
274 * Description:
275 * Adds the specified element to the specified hlist,
276 * while permitting racing traversals.
277 *
278 * The caller must take whatever precautions are necessary
279 * (such as holding appropriate locks) to avoid racing
280 * with another list-mutation primitive, such as hlist_add_head_rcu()
281 * or hlist_del_rcu(), running on this same list.
282 * However, it is perfectly legal to run concurrently with
283 * the _rcu list-traversal primitives, such as
284 * hlist_for_each_entry_rcu(), used to prevent memory-consistency
285 * problems on Alpha CPUs. Regardless of the type of CPU, the
286 * list-traversal primitive must be guarded by rcu_read_lock().
287 */
288static inline void hlist_add_head_rcu(struct hlist_node *n,
289 struct hlist_head *h)
290{
291 struct hlist_node *first = h->first;
292
293 n->next = first;
294 n->pprev = &h->first;
295 rcu_assign_pointer(h->first, n);
296 if (first)
297 first->pprev = &n->next;
298}
299
300/**
301 * hlist_add_before_rcu
302 * @n: the new element to add to the hash list.
303 * @next: the existing element to add the new element before.
304 *
305 * Description:
306 * Adds the specified element to the specified hlist
307 * before the specified node while permitting racing traversals.
308 *
309 * The caller must take whatever precautions are necessary
310 * (such as holding appropriate locks) to avoid racing
311 * with another list-mutation primitive, such as hlist_add_head_rcu()
312 * or hlist_del_rcu(), running on this same list.
313 * However, it is perfectly legal to run concurrently with
314 * the _rcu list-traversal primitives, such as
315 * hlist_for_each_entry_rcu(), used to prevent memory-consistency
316 * problems on Alpha CPUs.
317 */
318static inline void hlist_add_before_rcu(struct hlist_node *n,
319 struct hlist_node *next)
320{
321 n->pprev = next->pprev;
322 n->next = next;
323 rcu_assign_pointer(*(n->pprev), n);
324 next->pprev = &n->next;
325}
326
327/**
328 * hlist_add_after_rcu
329 * @prev: the existing element to add the new element after.
330 * @n: the new element to add to the hash list.
331 *
332 * Description:
333 * Adds the specified element to the specified hlist
334 * after the specified node while permitting racing traversals.
335 *
336 * The caller must take whatever precautions are necessary
337 * (such as holding appropriate locks) to avoid racing
338 * with another list-mutation primitive, such as hlist_add_head_rcu()
339 * or hlist_del_rcu(), running on this same list.
340 * However, it is perfectly legal to run concurrently with
341 * the _rcu list-traversal primitives, such as
342 * hlist_for_each_entry_rcu(), used to prevent memory-consistency
343 * problems on Alpha CPUs.
344 */
345static inline void hlist_add_after_rcu(struct hlist_node *prev,
346 struct hlist_node *n)
347{
348 n->next = prev->next;
349 n->pprev = &prev->next;
350 rcu_assign_pointer(prev->next, n);
351 if (n->next)
352 n->next->pprev = &n->next;
353}
354
355/**
356 * hlist_for_each_entry_rcu - iterate over rcu list of given type
357 * @tpos: the type * to use as a loop cursor.
358 * @pos: the &struct hlist_node to use as a loop cursor.
359 * @head: the head for your list.
360 * @member: the name of the hlist_node within the struct.
361 *
362 * This list-traversal primitive may safely run concurrently with
363 * the _rcu list-mutation primitives such as hlist_add_head_rcu()
364 * as long as the traversal is guarded by rcu_read_lock().
365 */
366#define hlist_for_each_entry_rcu(tpos, pos, head, member) \
367 for (pos = rcu_dereference((head)->first); \
368 pos && ({ prefetch(pos->next); 1; }) && \
369 ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
370 pos = rcu_dereference(pos->next))
371
372#endif /* __KERNEL__ */
373#endif
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index d42dbec06083..e8b4039cfb2f 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -40,6 +40,7 @@
40#include <linux/cpumask.h> 40#include <linux/cpumask.h>
41#include <linux/seqlock.h> 41#include <linux/seqlock.h>
42#include <linux/lockdep.h> 42#include <linux/lockdep.h>
43#include <linux/completion.h>
43 44
44/** 45/**
45 * struct rcu_head - callback structure for use with RCU 46 * struct rcu_head - callback structure for use with RCU
@@ -168,6 +169,27 @@ struct rcu_head {
168 (p) = (v); \ 169 (p) = (v); \
169 }) 170 })
170 171
172/* Infrastructure to implement the synchronize_() primitives. */
173
174struct rcu_synchronize {
175 struct rcu_head head;
176 struct completion completion;
177};
178
179extern void wakeme_after_rcu(struct rcu_head *head);
180
181#define synchronize_rcu_xxx(name, func) \
182void name(void) \
183{ \
184 struct rcu_synchronize rcu; \
185 \
186 init_completion(&rcu.completion); \
187 /* Will wake me after RCU finished. */ \
188 func(&rcu.head, wakeme_after_rcu); \
189 /* Wait for it. */ \
190 wait_for_completion(&rcu.completion); \
191}
192
171/** 193/**
172 * synchronize_sched - block until all CPUs have exited any non-preemptive 194 * synchronize_sched - block until all CPUs have exited any non-preemptive
173 * kernel code sequences. 195 * kernel code sequences.
@@ -224,8 +246,8 @@ extern void call_rcu_bh(struct rcu_head *head,
224/* Exported common interfaces */ 246/* Exported common interfaces */
225extern void synchronize_rcu(void); 247extern void synchronize_rcu(void);
226extern void rcu_barrier(void); 248extern void rcu_barrier(void);
227extern long rcu_batches_completed(void); 249extern void rcu_barrier_bh(void);
228extern long rcu_batches_completed_bh(void); 250extern void rcu_barrier_sched(void);
229 251
230/* Internal to kernel */ 252/* Internal to kernel */
231extern void rcu_init(void); 253extern void rcu_init(void);
diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h
index 8a05c7e20bc4..f04b64eca636 100644
--- a/include/linux/rcupreempt.h
+++ b/include/linux/rcupreempt.h
@@ -40,10 +40,39 @@
40#include <linux/cpumask.h> 40#include <linux/cpumask.h>
41#include <linux/seqlock.h> 41#include <linux/seqlock.h>
42 42
43#define rcu_qsctr_inc(cpu) 43struct rcu_dyntick_sched {
44 int dynticks;
45 int dynticks_snap;
46 int sched_qs;
47 int sched_qs_snap;
48 int sched_dynticks_snap;
49};
50
51DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched);
52
53static inline void rcu_qsctr_inc(int cpu)
54{
55 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
56
57 rdssp->sched_qs++;
58}
44#define rcu_bh_qsctr_inc(cpu) 59#define rcu_bh_qsctr_inc(cpu)
45#define call_rcu_bh(head, rcu) call_rcu(head, rcu) 60#define call_rcu_bh(head, rcu) call_rcu(head, rcu)
46 61
62/**
63 * call_rcu_sched - Queue RCU callback for invocation after sched grace period.
64 * @head: structure to be used for queueing the RCU updates.
65 * @func: actual update function to be invoked after the grace period
66 *
67 * The update function will be invoked some time after a full
68 * synchronize_sched()-style grace period elapses, in other words after
69 * all currently executing preempt-disabled sections of code (including
70 * hardirq handlers, NMI handlers, and local_irq_save() blocks) have
71 * completed.
72 */
73extern void call_rcu_sched(struct rcu_head *head,
74 void (*func)(struct rcu_head *head));
75
47extern void __rcu_read_lock(void) __acquires(RCU); 76extern void __rcu_read_lock(void) __acquires(RCU);
48extern void __rcu_read_unlock(void) __releases(RCU); 77extern void __rcu_read_unlock(void) __releases(RCU);
49extern int rcu_pending(int cpu); 78extern int rcu_pending(int cpu);
@@ -55,6 +84,7 @@ extern int rcu_needs_cpu(int cpu);
55extern void __synchronize_sched(void); 84extern void __synchronize_sched(void);
56 85
57extern void __rcu_init(void); 86extern void __rcu_init(void);
87extern void rcu_init_sched(void);
58extern void rcu_check_callbacks(int cpu, int user); 88extern void rcu_check_callbacks(int cpu, int user);
59extern void rcu_restart_cpu(int cpu); 89extern void rcu_restart_cpu(int cpu);
60extern long rcu_batches_completed(void); 90extern long rcu_batches_completed(void);
@@ -81,20 +111,20 @@ extern struct rcupreempt_trace *rcupreempt_trace_cpu(int cpu);
81struct softirq_action; 111struct softirq_action;
82 112
83#ifdef CONFIG_NO_HZ 113#ifdef CONFIG_NO_HZ
84DECLARE_PER_CPU(long, dynticks_progress_counter); 114DECLARE_PER_CPU(struct rcu_dyntick_sched, rcu_dyntick_sched);
85 115
86static inline void rcu_enter_nohz(void) 116static inline void rcu_enter_nohz(void)
87{ 117{
88 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ 118 smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */
89 __get_cpu_var(dynticks_progress_counter)++; 119 __get_cpu_var(rcu_dyntick_sched).dynticks++;
90 WARN_ON(__get_cpu_var(dynticks_progress_counter) & 0x1); 120 WARN_ON(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1);
91} 121}
92 122
93static inline void rcu_exit_nohz(void) 123static inline void rcu_exit_nohz(void)
94{ 124{
95 __get_cpu_var(dynticks_progress_counter)++;
96 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ 125 smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */
97 WARN_ON(!(__get_cpu_var(dynticks_progress_counter) & 0x1)); 126 __get_cpu_var(rcu_dyntick_sched).dynticks++;
127 WARN_ON(!(__get_cpu_var(rcu_dyntick_sched).dynticks & 0x1));
98} 128}
99 129
100#else /* CONFIG_NO_HZ */ 130#else /* CONFIG_NO_HZ */
diff --git a/init/main.c b/init/main.c
index f7fb20021d48..a9cc3e0803de 100644
--- a/init/main.c
+++ b/init/main.c
@@ -758,6 +758,7 @@ static void __init do_initcalls(void)
758 */ 758 */
759static void __init do_basic_setup(void) 759static void __init do_basic_setup(void)
760{ 760{
761 rcu_init_sched(); /* needed by module_init stage. */
761 /* drivers will send hotplug events */ 762 /* drivers will send hotplug events */
762 init_workqueues(); 763 init_workqueues();
763 usermodehelper_init(); 764 usermodehelper_init();
diff --git a/kernel/pid.c b/kernel/pid.c
index 20d59fa2d493..30bd5d4b2ac7 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -30,6 +30,7 @@
30#include <linux/module.h> 30#include <linux/module.h>
31#include <linux/slab.h> 31#include <linux/slab.h>
32#include <linux/init.h> 32#include <linux/init.h>
33#include <linux/rculist.h>
33#include <linux/bootmem.h> 34#include <linux/bootmem.h>
34#include <linux/hash.h> 35#include <linux/hash.h>
35#include <linux/pid_namespace.h> 36#include <linux/pid_namespace.h>
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index f4ffbd0f306f..d8348792f9f5 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -502,10 +502,38 @@ void rcu_check_callbacks(int cpu, int user)
502 if (user || 502 if (user ||
503 (idle_cpu(cpu) && !in_softirq() && 503 (idle_cpu(cpu) && !in_softirq() &&
504 hardirq_count() <= (1 << HARDIRQ_SHIFT))) { 504 hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
505
506 /*
507 * Get here if this CPU took its interrupt from user
508 * mode or from the idle loop, and if this is not a
509 * nested interrupt. In this case, the CPU is in
510 * a quiescent state, so count it.
511 *
512 * Also do a memory barrier. This is needed to handle
513 * the case where writes from a preempt-disable section
514 * of code get reordered into schedule() by this CPU's
515 * write buffer. The memory barrier makes sure that
516 * the rcu_qsctr_inc() and rcu_bh_qsctr_inc() are see
517 * by other CPUs to happen after any such write.
518 */
519
520 smp_mb(); /* See above block comment. */
505 rcu_qsctr_inc(cpu); 521 rcu_qsctr_inc(cpu);
506 rcu_bh_qsctr_inc(cpu); 522 rcu_bh_qsctr_inc(cpu);
507 } else if (!in_softirq()) 523
524 } else if (!in_softirq()) {
525
526 /*
527 * Get here if this CPU did not take its interrupt from
528 * softirq, in other words, if it is not interrupting
529 * a rcu_bh read-side critical section. This is an _bh
530 * critical section, so count it. The memory barrier
531 * is needed for the same reason as is the above one.
532 */
533
534 smp_mb(); /* See above block comment. */
508 rcu_bh_qsctr_inc(cpu); 535 rcu_bh_qsctr_inc(cpu);
536 }
509 raise_rcu_softirq(); 537 raise_rcu_softirq();
510} 538}
511 539
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index c09605f8d16c..4a74b8d48d90 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -39,16 +39,16 @@
39#include <linux/sched.h> 39#include <linux/sched.h>
40#include <asm/atomic.h> 40#include <asm/atomic.h>
41#include <linux/bitops.h> 41#include <linux/bitops.h>
42#include <linux/completion.h>
43#include <linux/percpu.h> 42#include <linux/percpu.h>
44#include <linux/notifier.h> 43#include <linux/notifier.h>
45#include <linux/cpu.h> 44#include <linux/cpu.h>
46#include <linux/mutex.h> 45#include <linux/mutex.h>
47#include <linux/module.h> 46#include <linux/module.h>
48 47
49struct rcu_synchronize { 48enum rcu_barrier {
50 struct rcu_head head; 49 RCU_BARRIER_STD,
51 struct completion completion; 50 RCU_BARRIER_BH,
51 RCU_BARRIER_SCHED,
52}; 52};
53 53
54static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; 54static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
@@ -60,7 +60,7 @@ static struct completion rcu_barrier_completion;
60 * Awaken the corresponding synchronize_rcu() instance now that a 60 * Awaken the corresponding synchronize_rcu() instance now that a
61 * grace period has elapsed. 61 * grace period has elapsed.
62 */ 62 */
63static void wakeme_after_rcu(struct rcu_head *head) 63void wakeme_after_rcu(struct rcu_head *head)
64{ 64{
65 struct rcu_synchronize *rcu; 65 struct rcu_synchronize *rcu;
66 66
@@ -77,17 +77,7 @@ static void wakeme_after_rcu(struct rcu_head *head)
77 * sections are delimited by rcu_read_lock() and rcu_read_unlock(), 77 * sections are delimited by rcu_read_lock() and rcu_read_unlock(),
78 * and may be nested. 78 * and may be nested.
79 */ 79 */
80void synchronize_rcu(void) 80synchronize_rcu_xxx(synchronize_rcu, call_rcu)
81{
82 struct rcu_synchronize rcu;
83
84 init_completion(&rcu.completion);
85 /* Will wake me after RCU finished */
86 call_rcu(&rcu.head, wakeme_after_rcu);
87
88 /* Wait for it */
89 wait_for_completion(&rcu.completion);
90}
91EXPORT_SYMBOL_GPL(synchronize_rcu); 81EXPORT_SYMBOL_GPL(synchronize_rcu);
92 82
93static void rcu_barrier_callback(struct rcu_head *notused) 83static void rcu_barrier_callback(struct rcu_head *notused)
@@ -99,19 +89,30 @@ static void rcu_barrier_callback(struct rcu_head *notused)
99/* 89/*
100 * Called with preemption disabled, and from cross-cpu IRQ context. 90 * Called with preemption disabled, and from cross-cpu IRQ context.
101 */ 91 */
102static void rcu_barrier_func(void *notused) 92static void rcu_barrier_func(void *type)
103{ 93{
104 int cpu = smp_processor_id(); 94 int cpu = smp_processor_id();
105 struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu); 95 struct rcu_head *head = &per_cpu(rcu_barrier_head, cpu);
106 96
107 atomic_inc(&rcu_barrier_cpu_count); 97 atomic_inc(&rcu_barrier_cpu_count);
108 call_rcu(head, rcu_barrier_callback); 98 switch ((enum rcu_barrier)type) {
99 case RCU_BARRIER_STD:
100 call_rcu(head, rcu_barrier_callback);
101 break;
102 case RCU_BARRIER_BH:
103 call_rcu_bh(head, rcu_barrier_callback);
104 break;
105 case RCU_BARRIER_SCHED:
106 call_rcu_sched(head, rcu_barrier_callback);
107 break;
108 }
109} 109}
110 110
111/** 111/*
112 * rcu_barrier - Wait until all the in-flight RCUs are complete. 112 * Orchestrate the specified type of RCU barrier, waiting for all
113 * RCU callbacks of the specified type to complete.
113 */ 114 */
114void rcu_barrier(void) 115static void _rcu_barrier(enum rcu_barrier type)
115{ 116{
116 BUG_ON(in_interrupt()); 117 BUG_ON(in_interrupt());
117 /* Take cpucontrol mutex to protect against CPU hotplug */ 118 /* Take cpucontrol mutex to protect against CPU hotplug */
@@ -127,13 +128,39 @@ void rcu_barrier(void)
127 * until all the callbacks are queued. 128 * until all the callbacks are queued.
128 */ 129 */
129 rcu_read_lock(); 130 rcu_read_lock();
130 on_each_cpu(rcu_barrier_func, NULL, 0, 1); 131 on_each_cpu(rcu_barrier_func, (void *)type, 0, 1);
131 rcu_read_unlock(); 132 rcu_read_unlock();
132 wait_for_completion(&rcu_barrier_completion); 133 wait_for_completion(&rcu_barrier_completion);
133 mutex_unlock(&rcu_barrier_mutex); 134 mutex_unlock(&rcu_barrier_mutex);
134} 135}
136
137/**
138 * rcu_barrier - Wait until all in-flight call_rcu() callbacks complete.
139 */
140void rcu_barrier(void)
141{
142 _rcu_barrier(RCU_BARRIER_STD);
143}
135EXPORT_SYMBOL_GPL(rcu_barrier); 144EXPORT_SYMBOL_GPL(rcu_barrier);
136 145
146/**
147 * rcu_barrier_bh - Wait until all in-flight call_rcu_bh() callbacks complete.
148 */
149void rcu_barrier_bh(void)
150{
151 _rcu_barrier(RCU_BARRIER_BH);
152}
153EXPORT_SYMBOL_GPL(rcu_barrier_bh);
154
155/**
156 * rcu_barrier_sched - Wait for in-flight call_rcu_sched() callbacks.
157 */
158void rcu_barrier_sched(void)
159{
160 _rcu_barrier(RCU_BARRIER_SCHED);
161}
162EXPORT_SYMBOL_GPL(rcu_barrier_sched);
163
137void __init rcu_init(void) 164void __init rcu_init(void)
138{ 165{
139 __rcu_init(); 166 __rcu_init();
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index e1cdf196a515..396b121edfe5 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -46,11 +46,11 @@
46#include <asm/atomic.h> 46#include <asm/atomic.h>
47#include <linux/bitops.h> 47#include <linux/bitops.h>
48#include <linux/module.h> 48#include <linux/module.h>
49#include <linux/kthread.h>
49#include <linux/completion.h> 50#include <linux/completion.h>
50#include <linux/moduleparam.h> 51#include <linux/moduleparam.h>
51#include <linux/percpu.h> 52#include <linux/percpu.h>
52#include <linux/notifier.h> 53#include <linux/notifier.h>
53#include <linux/rcupdate.h>
54#include <linux/cpu.h> 54#include <linux/cpu.h>
55#include <linux/random.h> 55#include <linux/random.h>
56#include <linux/delay.h> 56#include <linux/delay.h>
@@ -82,14 +82,18 @@ struct rcu_data {
82 spinlock_t lock; /* Protect rcu_data fields. */ 82 spinlock_t lock; /* Protect rcu_data fields. */
83 long completed; /* Number of last completed batch. */ 83 long completed; /* Number of last completed batch. */
84 int waitlistcount; 84 int waitlistcount;
85 struct tasklet_struct rcu_tasklet;
86 struct rcu_head *nextlist; 85 struct rcu_head *nextlist;
87 struct rcu_head **nexttail; 86 struct rcu_head **nexttail;
88 struct rcu_head *waitlist[GP_STAGES]; 87 struct rcu_head *waitlist[GP_STAGES];
89 struct rcu_head **waittail[GP_STAGES]; 88 struct rcu_head **waittail[GP_STAGES];
90 struct rcu_head *donelist; 89 struct rcu_head *donelist; /* from waitlist & waitschedlist */
91 struct rcu_head **donetail; 90 struct rcu_head **donetail;
92 long rcu_flipctr[2]; 91 long rcu_flipctr[2];
92 struct rcu_head *nextschedlist;
93 struct rcu_head **nextschedtail;
94 struct rcu_head *waitschedlist;
95 struct rcu_head **waitschedtail;
96 int rcu_sched_sleeping;
93#ifdef CONFIG_RCU_TRACE 97#ifdef CONFIG_RCU_TRACE
94 struct rcupreempt_trace trace; 98 struct rcupreempt_trace trace;
95#endif /* #ifdef CONFIG_RCU_TRACE */ 99#endif /* #ifdef CONFIG_RCU_TRACE */
@@ -131,11 +135,24 @@ enum rcu_try_flip_states {
131 rcu_try_flip_waitmb_state, 135 rcu_try_flip_waitmb_state,
132}; 136};
133 137
138/*
139 * States for rcu_ctrlblk.rcu_sched_sleep.
140 */
141
142enum rcu_sched_sleep_states {
143 rcu_sched_not_sleeping, /* Not sleeping, callbacks need GP. */
144 rcu_sched_sleep_prep, /* Thinking of sleeping, rechecking. */
145 rcu_sched_sleeping, /* Sleeping, awaken if GP needed. */
146};
147
134struct rcu_ctrlblk { 148struct rcu_ctrlblk {
135 spinlock_t fliplock; /* Protect state-machine transitions. */ 149 spinlock_t fliplock; /* Protect state-machine transitions. */
136 long completed; /* Number of last completed batch. */ 150 long completed; /* Number of last completed batch. */
137 enum rcu_try_flip_states rcu_try_flip_state; /* The current state of 151 enum rcu_try_flip_states rcu_try_flip_state; /* The current state of
138 the rcu state machine */ 152 the rcu state machine */
153 spinlock_t schedlock; /* Protect rcu_sched sleep state. */
154 enum rcu_sched_sleep_states sched_sleep; /* rcu_sched state. */
155 wait_queue_head_t sched_wq; /* Place for rcu_sched to sleep. */
139}; 156};
140 157
141static DEFINE_PER_CPU(struct rcu_data, rcu_data); 158static DEFINE_PER_CPU(struct rcu_data, rcu_data);
@@ -143,8 +160,12 @@ static struct rcu_ctrlblk rcu_ctrlblk = {
143 .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock), 160 .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock),
144 .completed = 0, 161 .completed = 0,
145 .rcu_try_flip_state = rcu_try_flip_idle_state, 162 .rcu_try_flip_state = rcu_try_flip_idle_state,
163 .schedlock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.schedlock),
164 .sched_sleep = rcu_sched_not_sleeping,
165 .sched_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rcu_ctrlblk.sched_wq),
146}; 166};
147 167
168static struct task_struct *rcu_sched_grace_period_task;
148 169
149#ifdef CONFIG_RCU_TRACE 170#ifdef CONFIG_RCU_TRACE
150static char *rcu_try_flip_state_names[] = 171static char *rcu_try_flip_state_names[] =
@@ -207,6 +228,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_mb_flag_values, rcu_mb_flag)
207 */ 228 */
208#define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace)); 229#define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace));
209 230
231#define RCU_SCHED_BATCH_TIME (HZ / 50)
232
210/* 233/*
211 * Return the number of RCU batches processed thus far. Useful 234 * Return the number of RCU batches processed thus far. Useful
212 * for debug and statistics. 235 * for debug and statistics.
@@ -217,8 +240,6 @@ long rcu_batches_completed(void)
217} 240}
218EXPORT_SYMBOL_GPL(rcu_batches_completed); 241EXPORT_SYMBOL_GPL(rcu_batches_completed);
219 242
220EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
221
222void __rcu_read_lock(void) 243void __rcu_read_lock(void)
223{ 244{
224 int idx; 245 int idx;
@@ -413,32 +434,34 @@ static void __rcu_advance_callbacks(struct rcu_data *rdp)
413 } 434 }
414} 435}
415 436
416#ifdef CONFIG_NO_HZ 437DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched) = {
438 .dynticks = 1,
439};
417 440
418DEFINE_PER_CPU(long, dynticks_progress_counter) = 1; 441#ifdef CONFIG_NO_HZ
419static DEFINE_PER_CPU(long, rcu_dyntick_snapshot);
420static DEFINE_PER_CPU(int, rcu_update_flag); 442static DEFINE_PER_CPU(int, rcu_update_flag);
421 443
422/** 444/**
423 * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI. 445 * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI.
424 * 446 *
425 * If the CPU was idle with dynamic ticks active, this updates the 447 * If the CPU was idle with dynamic ticks active, this updates the
426 * dynticks_progress_counter to let the RCU handling know that the 448 * rcu_dyntick_sched.dynticks to let the RCU handling know that the
427 * CPU is active. 449 * CPU is active.
428 */ 450 */
429void rcu_irq_enter(void) 451void rcu_irq_enter(void)
430{ 452{
431 int cpu = smp_processor_id(); 453 int cpu = smp_processor_id();
454 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
432 455
433 if (per_cpu(rcu_update_flag, cpu)) 456 if (per_cpu(rcu_update_flag, cpu))
434 per_cpu(rcu_update_flag, cpu)++; 457 per_cpu(rcu_update_flag, cpu)++;
435 458
436 /* 459 /*
437 * Only update if we are coming from a stopped ticks mode 460 * Only update if we are coming from a stopped ticks mode
438 * (dynticks_progress_counter is even). 461 * (rcu_dyntick_sched.dynticks is even).
439 */ 462 */
440 if (!in_interrupt() && 463 if (!in_interrupt() &&
441 (per_cpu(dynticks_progress_counter, cpu) & 0x1) == 0) { 464 (rdssp->dynticks & 0x1) == 0) {
442 /* 465 /*
443 * The following might seem like we could have a race 466 * The following might seem like we could have a race
444 * with NMI/SMIs. But this really isn't a problem. 467 * with NMI/SMIs. But this really isn't a problem.
@@ -461,12 +484,12 @@ void rcu_irq_enter(void)
461 * RCU read-side critical sections on this CPU would 484 * RCU read-side critical sections on this CPU would
462 * have already completed. 485 * have already completed.
463 */ 486 */
464 per_cpu(dynticks_progress_counter, cpu)++; 487 rdssp->dynticks++;
465 /* 488 /*
466 * The following memory barrier ensures that any 489 * The following memory barrier ensures that any
467 * rcu_read_lock() primitives in the irq handler 490 * rcu_read_lock() primitives in the irq handler
468 * are seen by other CPUs to follow the above 491 * are seen by other CPUs to follow the above
469 * increment to dynticks_progress_counter. This is 492 * increment to rcu_dyntick_sched.dynticks. This is
470 * required in order for other CPUs to correctly 493 * required in order for other CPUs to correctly
471 * determine when it is safe to advance the RCU 494 * determine when it is safe to advance the RCU
472 * grace-period state machine. 495 * grace-period state machine.
@@ -474,7 +497,7 @@ void rcu_irq_enter(void)
474 smp_mb(); /* see above block comment. */ 497 smp_mb(); /* see above block comment. */
475 /* 498 /*
476 * Since we can't determine the dynamic tick mode from 499 * Since we can't determine the dynamic tick mode from
477 * the dynticks_progress_counter after this routine, 500 * the rcu_dyntick_sched.dynticks after this routine,
478 * we use a second flag to acknowledge that we came 501 * we use a second flag to acknowledge that we came
479 * from an idle state with ticks stopped. 502 * from an idle state with ticks stopped.
480 */ 503 */
@@ -482,7 +505,7 @@ void rcu_irq_enter(void)
482 /* 505 /*
483 * If we take an NMI/SMI now, they will also increment 506 * If we take an NMI/SMI now, they will also increment
484 * the rcu_update_flag, and will not update the 507 * the rcu_update_flag, and will not update the
485 * dynticks_progress_counter on exit. That is for 508 * rcu_dyntick_sched.dynticks on exit. That is for
486 * this IRQ to do. 509 * this IRQ to do.
487 */ 510 */
488 } 511 }
@@ -492,12 +515,13 @@ void rcu_irq_enter(void)
492 * rcu_irq_exit - Called from exiting Hard irq context. 515 * rcu_irq_exit - Called from exiting Hard irq context.
493 * 516 *
494 * If the CPU was idle with dynamic ticks active, update the 517 * If the CPU was idle with dynamic ticks active, update the
495 * dynticks_progress_counter to put let the RCU handling be 518 * rcu_dyntick_sched.dynticks to put let the RCU handling be
496 * aware that the CPU is going back to idle with no ticks. 519 * aware that the CPU is going back to idle with no ticks.
497 */ 520 */
498void rcu_irq_exit(void) 521void rcu_irq_exit(void)
499{ 522{
500 int cpu = smp_processor_id(); 523 int cpu = smp_processor_id();
524 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
501 525
502 /* 526 /*
503 * rcu_update_flag is set if we interrupted the CPU 527 * rcu_update_flag is set if we interrupted the CPU
@@ -505,7 +529,7 @@ void rcu_irq_exit(void)
505 * Once this occurs, we keep track of interrupt nesting 529 * Once this occurs, we keep track of interrupt nesting
506 * because a NMI/SMI could also come in, and we still 530 * because a NMI/SMI could also come in, and we still
507 * only want the IRQ that started the increment of the 531 * only want the IRQ that started the increment of the
508 * dynticks_progress_counter to be the one that modifies 532 * rcu_dyntick_sched.dynticks to be the one that modifies
509 * it on exit. 533 * it on exit.
510 */ 534 */
511 if (per_cpu(rcu_update_flag, cpu)) { 535 if (per_cpu(rcu_update_flag, cpu)) {
@@ -517,28 +541,29 @@ void rcu_irq_exit(void)
517 541
518 /* 542 /*
519 * If an NMI/SMI happens now we are still 543 * If an NMI/SMI happens now we are still
520 * protected by the dynticks_progress_counter being odd. 544 * protected by the rcu_dyntick_sched.dynticks being odd.
521 */ 545 */
522 546
523 /* 547 /*
524 * The following memory barrier ensures that any 548 * The following memory barrier ensures that any
525 * rcu_read_unlock() primitives in the irq handler 549 * rcu_read_unlock() primitives in the irq handler
526 * are seen by other CPUs to preceed the following 550 * are seen by other CPUs to preceed the following
527 * increment to dynticks_progress_counter. This 551 * increment to rcu_dyntick_sched.dynticks. This
528 * is required in order for other CPUs to determine 552 * is required in order for other CPUs to determine
529 * when it is safe to advance the RCU grace-period 553 * when it is safe to advance the RCU grace-period
530 * state machine. 554 * state machine.
531 */ 555 */
532 smp_mb(); /* see above block comment. */ 556 smp_mb(); /* see above block comment. */
533 per_cpu(dynticks_progress_counter, cpu)++; 557 rdssp->dynticks++;
534 WARN_ON(per_cpu(dynticks_progress_counter, cpu) & 0x1); 558 WARN_ON(rdssp->dynticks & 0x1);
535 } 559 }
536} 560}
537 561
538static void dyntick_save_progress_counter(int cpu) 562static void dyntick_save_progress_counter(int cpu)
539{ 563{
540 per_cpu(rcu_dyntick_snapshot, cpu) = 564 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
541 per_cpu(dynticks_progress_counter, cpu); 565
566 rdssp->dynticks_snap = rdssp->dynticks;
542} 567}
543 568
544static inline int 569static inline int
@@ -546,9 +571,10 @@ rcu_try_flip_waitack_needed(int cpu)
546{ 571{
547 long curr; 572 long curr;
548 long snap; 573 long snap;
574 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
549 575
550 curr = per_cpu(dynticks_progress_counter, cpu); 576 curr = rdssp->dynticks;
551 snap = per_cpu(rcu_dyntick_snapshot, cpu); 577 snap = rdssp->dynticks_snap;
552 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ 578 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
553 579
554 /* 580 /*
@@ -569,7 +595,7 @@ rcu_try_flip_waitack_needed(int cpu)
569 * that this CPU already acknowledged the counter. 595 * that this CPU already acknowledged the counter.
570 */ 596 */
571 597
572 if ((curr - snap) > 2 || (snap & 0x1) == 0) 598 if ((curr - snap) > 2 || (curr & 0x1) == 0)
573 return 0; 599 return 0;
574 600
575 /* We need this CPU to explicitly acknowledge the counter flip. */ 601 /* We need this CPU to explicitly acknowledge the counter flip. */
@@ -582,9 +608,10 @@ rcu_try_flip_waitmb_needed(int cpu)
582{ 608{
583 long curr; 609 long curr;
584 long snap; 610 long snap;
611 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
585 612
586 curr = per_cpu(dynticks_progress_counter, cpu); 613 curr = rdssp->dynticks;
587 snap = per_cpu(rcu_dyntick_snapshot, cpu); 614 snap = rdssp->dynticks_snap;
588 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ 615 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
589 616
590 /* 617 /*
@@ -611,14 +638,86 @@ rcu_try_flip_waitmb_needed(int cpu)
611 return 1; 638 return 1;
612} 639}
613 640
641static void dyntick_save_progress_counter_sched(int cpu)
642{
643 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
644
645 rdssp->sched_dynticks_snap = rdssp->dynticks;
646}
647
648static int rcu_qsctr_inc_needed_dyntick(int cpu)
649{
650 long curr;
651 long snap;
652 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
653
654 curr = rdssp->dynticks;
655 snap = rdssp->sched_dynticks_snap;
656 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
657
658 /*
659 * If the CPU remained in dynticks mode for the entire time
660 * and didn't take any interrupts, NMIs, SMIs, or whatever,
661 * then it cannot be in the middle of an rcu_read_lock(), so
662 * the next rcu_read_lock() it executes must use the new value
663 * of the counter. Therefore, this CPU has been in a quiescent
664 * state the entire time, and we don't need to wait for it.
665 */
666
667 if ((curr == snap) && ((curr & 0x1) == 0))
668 return 0;
669
670 /*
671 * If the CPU passed through or entered a dynticks idle phase with
672 * no active irq handlers, then, as above, this CPU has already
673 * passed through a quiescent state.
674 */
675
676 if ((curr - snap) > 2 || (snap & 0x1) == 0)
677 return 0;
678
679 /* We need this CPU to go through a quiescent state. */
680
681 return 1;
682}
683
614#else /* !CONFIG_NO_HZ */ 684#else /* !CONFIG_NO_HZ */
615 685
616# define dyntick_save_progress_counter(cpu) do { } while (0) 686# define dyntick_save_progress_counter(cpu) do { } while (0)
617# define rcu_try_flip_waitack_needed(cpu) (1) 687# define rcu_try_flip_waitack_needed(cpu) (1)
618# define rcu_try_flip_waitmb_needed(cpu) (1) 688# define rcu_try_flip_waitmb_needed(cpu) (1)
689
690# define dyntick_save_progress_counter_sched(cpu) do { } while (0)
691# define rcu_qsctr_inc_needed_dyntick(cpu) (1)
619 692
620#endif /* CONFIG_NO_HZ */ 693#endif /* CONFIG_NO_HZ */
621 694
695static void save_qsctr_sched(int cpu)
696{
697 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
698
699 rdssp->sched_qs_snap = rdssp->sched_qs;
700}
701
702static inline int rcu_qsctr_inc_needed(int cpu)
703{
704 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
705
706 /*
707 * If there has been a quiescent state, no more need to wait
708 * on this CPU.
709 */
710
711 if (rdssp->sched_qs != rdssp->sched_qs_snap) {
712 smp_mb(); /* force ordering with cpu entering schedule(). */
713 return 0;
714 }
715
716 /* We need this CPU to go through a quiescent state. */
717
718 return 1;
719}
720
622/* 721/*
623 * Get here when RCU is idle. Decide whether we need to 722 * Get here when RCU is idle. Decide whether we need to
624 * move out of idle state, and return non-zero if so. 723 * move out of idle state, and return non-zero if so.
@@ -821,6 +920,26 @@ void rcu_check_callbacks(int cpu, int user)
821 unsigned long flags; 920 unsigned long flags;
822 struct rcu_data *rdp = RCU_DATA_CPU(cpu); 921 struct rcu_data *rdp = RCU_DATA_CPU(cpu);
823 922
923 /*
924 * If this CPU took its interrupt from user mode or from the
925 * idle loop, and this is not a nested interrupt, then
926 * this CPU has to have exited all prior preept-disable
927 * sections of code. So increment the counter to note this.
928 *
929 * The memory barrier is needed to handle the case where
930 * writes from a preempt-disable section of code get reordered
931 * into schedule() by this CPU's write buffer. So the memory
932 * barrier makes sure that the rcu_qsctr_inc() is seen by other
933 * CPUs to happen after any such write.
934 */
935
936 if (user ||
937 (idle_cpu(cpu) && !in_softirq() &&
938 hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
939 smp_mb(); /* Guard against aggressive schedule(). */
940 rcu_qsctr_inc(cpu);
941 }
942
824 rcu_check_mb(cpu); 943 rcu_check_mb(cpu);
825 if (rcu_ctrlblk.completed == rdp->completed) 944 if (rcu_ctrlblk.completed == rdp->completed)
826 rcu_try_flip(); 945 rcu_try_flip();
@@ -871,6 +990,8 @@ void rcu_offline_cpu(int cpu)
871 struct rcu_head *list = NULL; 990 struct rcu_head *list = NULL;
872 unsigned long flags; 991 unsigned long flags;
873 struct rcu_data *rdp = RCU_DATA_CPU(cpu); 992 struct rcu_data *rdp = RCU_DATA_CPU(cpu);
993 struct rcu_head *schedlist = NULL;
994 struct rcu_head **schedtail = &schedlist;
874 struct rcu_head **tail = &list; 995 struct rcu_head **tail = &list;
875 996
876 /* 997 /*
@@ -884,6 +1005,11 @@ void rcu_offline_cpu(int cpu)
884 rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i], 1005 rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i],
885 list, tail); 1006 list, tail);
886 rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail); 1007 rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail);
1008 rcu_offline_cpu_enqueue(rdp->waitschedlist, rdp->waitschedtail,
1009 schedlist, schedtail);
1010 rcu_offline_cpu_enqueue(rdp->nextschedlist, rdp->nextschedtail,
1011 schedlist, schedtail);
1012 rdp->rcu_sched_sleeping = 0;
887 spin_unlock_irqrestore(&rdp->lock, flags); 1013 spin_unlock_irqrestore(&rdp->lock, flags);
888 rdp->waitlistcount = 0; 1014 rdp->waitlistcount = 0;
889 1015
@@ -918,22 +1044,40 @@ void rcu_offline_cpu(int cpu)
918 * fix. 1044 * fix.
919 */ 1045 */
920 1046
921 local_irq_save(flags); 1047 local_irq_save(flags); /* disable preempt till we know what lock. */
922 rdp = RCU_DATA_ME(); 1048 rdp = RCU_DATA_ME();
923 spin_lock(&rdp->lock); 1049 spin_lock(&rdp->lock);
924 *rdp->nexttail = list; 1050 *rdp->nexttail = list;
925 if (list) 1051 if (list)
926 rdp->nexttail = tail; 1052 rdp->nexttail = tail;
1053 *rdp->nextschedtail = schedlist;
1054 if (schedlist)
1055 rdp->nextschedtail = schedtail;
927 spin_unlock_irqrestore(&rdp->lock, flags); 1056 spin_unlock_irqrestore(&rdp->lock, flags);
928} 1057}
929 1058
930void __devinit rcu_online_cpu(int cpu) 1059void __devinit rcu_online_cpu(int cpu)
931{ 1060{
932 unsigned long flags; 1061 unsigned long flags;
1062 struct rcu_data *rdp;
933 1063
934 spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags); 1064 spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
935 cpu_set(cpu, rcu_cpu_online_map); 1065 cpu_set(cpu, rcu_cpu_online_map);
936 spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags); 1066 spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
1067
1068 /*
1069 * The rcu_sched grace-period processing might have bypassed
1070 * this CPU, given that it was not in the rcu_cpu_online_map
1071 * when the grace-period scan started. This means that the
1072 * grace-period task might sleep. So make sure that if this
1073 * should happen, the first callback posted to this CPU will
1074 * wake up the grace-period task if need be.
1075 */
1076
1077 rdp = RCU_DATA_CPU(cpu);
1078 spin_lock_irqsave(&rdp->lock, flags);
1079 rdp->rcu_sched_sleeping = 1;
1080 spin_unlock_irqrestore(&rdp->lock, flags);
937} 1081}
938 1082
939#else /* #ifdef CONFIG_HOTPLUG_CPU */ 1083#else /* #ifdef CONFIG_HOTPLUG_CPU */
@@ -988,31 +1132,196 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
988 *rdp->nexttail = head; 1132 *rdp->nexttail = head;
989 rdp->nexttail = &head->next; 1133 rdp->nexttail = &head->next;
990 RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp); 1134 RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp);
991 spin_unlock(&rdp->lock); 1135 spin_unlock_irqrestore(&rdp->lock, flags);
992 local_irq_restore(flags);
993} 1136}
994EXPORT_SYMBOL_GPL(call_rcu); 1137EXPORT_SYMBOL_GPL(call_rcu);
995 1138
1139void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
1140{
1141 unsigned long flags;
1142 struct rcu_data *rdp;
1143 int wake_gp = 0;
1144
1145 head->func = func;
1146 head->next = NULL;
1147 local_irq_save(flags);
1148 rdp = RCU_DATA_ME();
1149 spin_lock(&rdp->lock);
1150 *rdp->nextschedtail = head;
1151 rdp->nextschedtail = &head->next;
1152 if (rdp->rcu_sched_sleeping) {
1153
1154 /* Grace-period processing might be sleeping... */
1155
1156 rdp->rcu_sched_sleeping = 0;
1157 wake_gp = 1;
1158 }
1159 spin_unlock_irqrestore(&rdp->lock, flags);
1160 if (wake_gp) {
1161
1162 /* Wake up grace-period processing, unless someone beat us. */
1163
1164 spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
1165 if (rcu_ctrlblk.sched_sleep != rcu_sched_sleeping)
1166 wake_gp = 0;
1167 rcu_ctrlblk.sched_sleep = rcu_sched_not_sleeping;
1168 spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
1169 if (wake_gp)
1170 wake_up_interruptible(&rcu_ctrlblk.sched_wq);
1171 }
1172}
1173EXPORT_SYMBOL_GPL(call_rcu_sched);
1174
996/* 1175/*
997 * Wait until all currently running preempt_disable() code segments 1176 * Wait until all currently running preempt_disable() code segments
998 * (including hardware-irq-disable segments) complete. Note that 1177 * (including hardware-irq-disable segments) complete. Note that
999 * in -rt this does -not- necessarily result in all currently executing 1178 * in -rt this does -not- necessarily result in all currently executing
1000 * interrupt -handlers- having completed. 1179 * interrupt -handlers- having completed.
1001 */ 1180 */
1002void __synchronize_sched(void) 1181synchronize_rcu_xxx(__synchronize_sched, call_rcu_sched)
1182EXPORT_SYMBOL_GPL(__synchronize_sched);
1183
1184/*
1185 * kthread function that manages call_rcu_sched grace periods.
1186 */
1187static int rcu_sched_grace_period(void *arg)
1003{ 1188{
1004 cpumask_t oldmask; 1189 int couldsleep; /* might sleep after current pass. */
1190 int couldsleepnext = 0; /* might sleep after next pass. */
1005 int cpu; 1191 int cpu;
1192 unsigned long flags;
1193 struct rcu_data *rdp;
1194 int ret;
1006 1195
1007 if (sched_getaffinity(0, &oldmask) < 0) 1196 /*
1008 oldmask = cpu_possible_map; 1197 * Each pass through the following loop handles one
1009 for_each_online_cpu(cpu) { 1198 * rcu_sched grace period cycle.
1010 sched_setaffinity(0, &cpumask_of_cpu(cpu)); 1199 */
1011 schedule(); 1200 do {
1012 } 1201 /* Save each CPU's current state. */
1013 sched_setaffinity(0, &oldmask); 1202
1203 for_each_online_cpu(cpu) {
1204 dyntick_save_progress_counter_sched(cpu);
1205 save_qsctr_sched(cpu);
1206 }
1207
1208 /*
1209 * Sleep for about an RCU grace-period's worth to
1210 * allow better batching and to consume less CPU.
1211 */
1212 schedule_timeout_interruptible(RCU_SCHED_BATCH_TIME);
1213
1214 /*
1215 * If there was nothing to do last time, prepare to
1216 * sleep at the end of the current grace period cycle.
1217 */
1218 couldsleep = couldsleepnext;
1219 couldsleepnext = 1;
1220 if (couldsleep) {
1221 spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
1222 rcu_ctrlblk.sched_sleep = rcu_sched_sleep_prep;
1223 spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
1224 }
1225
1226 /*
1227 * Wait on each CPU in turn to have either visited
1228 * a quiescent state or been in dynticks-idle mode.
1229 */
1230 for_each_online_cpu(cpu) {
1231 while (rcu_qsctr_inc_needed(cpu) &&
1232 rcu_qsctr_inc_needed_dyntick(cpu)) {
1233 /* resched_cpu(cpu); @@@ */
1234 schedule_timeout_interruptible(1);
1235 }
1236 }
1237
1238 /* Advance callbacks for each CPU. */
1239
1240 for_each_online_cpu(cpu) {
1241
1242 rdp = RCU_DATA_CPU(cpu);
1243 spin_lock_irqsave(&rdp->lock, flags);
1244
1245 /*
1246 * We are running on this CPU irq-disabled, so no
1247 * CPU can go offline until we re-enable irqs.
1248 * The current CPU might have already gone
1249 * offline (between the for_each_offline_cpu and
1250 * the spin_lock_irqsave), but in that case all its
1251 * callback lists will be empty, so no harm done.
1252 *
1253 * Advance the callbacks! We share normal RCU's
1254 * donelist, since callbacks are invoked the
1255 * same way in either case.
1256 */
1257 if (rdp->waitschedlist != NULL) {
1258 *rdp->donetail = rdp->waitschedlist;
1259 rdp->donetail = rdp->waitschedtail;
1260
1261 /*
1262 * Next rcu_check_callbacks() will
1263 * do the required raise_softirq().
1264 */
1265 }
1266 if (rdp->nextschedlist != NULL) {
1267 rdp->waitschedlist = rdp->nextschedlist;
1268 rdp->waitschedtail = rdp->nextschedtail;
1269 couldsleep = 0;
1270 couldsleepnext = 0;
1271 } else {
1272 rdp->waitschedlist = NULL;
1273 rdp->waitschedtail = &rdp->waitschedlist;
1274 }
1275 rdp->nextschedlist = NULL;
1276 rdp->nextschedtail = &rdp->nextschedlist;
1277
1278 /* Mark sleep intention. */
1279
1280 rdp->rcu_sched_sleeping = couldsleep;
1281
1282 spin_unlock_irqrestore(&rdp->lock, flags);
1283 }
1284
1285 /* If we saw callbacks on the last scan, go deal with them. */
1286
1287 if (!couldsleep)
1288 continue;
1289
1290 /* Attempt to block... */
1291
1292 spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
1293 if (rcu_ctrlblk.sched_sleep != rcu_sched_sleep_prep) {
1294
1295 /*
1296 * Someone posted a callback after we scanned.
1297 * Go take care of it.
1298 */
1299 spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
1300 couldsleepnext = 0;
1301 continue;
1302 }
1303
1304 /* Block until the next person posts a callback. */
1305
1306 rcu_ctrlblk.sched_sleep = rcu_sched_sleeping;
1307 spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
1308 ret = 0;
1309 __wait_event_interruptible(rcu_ctrlblk.sched_wq,
1310 rcu_ctrlblk.sched_sleep != rcu_sched_sleeping,
1311 ret);
1312
1313 /*
1314 * Signals would prevent us from sleeping, and we cannot
1315 * do much with them in any case. So flush them.
1316 */
1317 if (ret)
1318 flush_signals(current);
1319 couldsleepnext = 0;
1320
1321 } while (!kthread_should_stop());
1322
1323 return (0);
1014} 1324}
1015EXPORT_SYMBOL_GPL(__synchronize_sched);
1016 1325
1017/* 1326/*
1018 * Check to see if any future RCU-related work will need to be done 1327 * Check to see if any future RCU-related work will need to be done
@@ -1029,7 +1338,9 @@ int rcu_needs_cpu(int cpu)
1029 1338
1030 return (rdp->donelist != NULL || 1339 return (rdp->donelist != NULL ||
1031 !!rdp->waitlistcount || 1340 !!rdp->waitlistcount ||
1032 rdp->nextlist != NULL); 1341 rdp->nextlist != NULL ||
1342 rdp->nextschedlist != NULL ||
1343 rdp->waitschedlist != NULL);
1033} 1344}
1034 1345
1035int rcu_pending(int cpu) 1346int rcu_pending(int cpu)
@@ -1040,7 +1351,9 @@ int rcu_pending(int cpu)
1040 1351
1041 if (rdp->donelist != NULL || 1352 if (rdp->donelist != NULL ||
1042 !!rdp->waitlistcount || 1353 !!rdp->waitlistcount ||
1043 rdp->nextlist != NULL) 1354 rdp->nextlist != NULL ||
1355 rdp->nextschedlist != NULL ||
1356 rdp->waitschedlist != NULL)
1044 return 1; 1357 return 1;
1045 1358
1046 /* The RCU core needs an acknowledgement from this CPU. */ 1359 /* The RCU core needs an acknowledgement from this CPU. */
@@ -1107,6 +1420,11 @@ void __init __rcu_init(void)
1107 rdp->donetail = &rdp->donelist; 1420 rdp->donetail = &rdp->donelist;
1108 rdp->rcu_flipctr[0] = 0; 1421 rdp->rcu_flipctr[0] = 0;
1109 rdp->rcu_flipctr[1] = 0; 1422 rdp->rcu_flipctr[1] = 0;
1423 rdp->nextschedlist = NULL;
1424 rdp->nextschedtail = &rdp->nextschedlist;
1425 rdp->waitschedlist = NULL;
1426 rdp->waitschedtail = &rdp->waitschedlist;
1427 rdp->rcu_sched_sleeping = 0;
1110 } 1428 }
1111 register_cpu_notifier(&rcu_nb); 1429 register_cpu_notifier(&rcu_nb);
1112 1430
@@ -1129,11 +1447,15 @@ void __init __rcu_init(void)
1129} 1447}
1130 1448
1131/* 1449/*
1132 * Deprecated, use synchronize_rcu() or synchronize_sched() instead. 1450 * Late-boot-time RCU initialization that must wait until after scheduler
1451 * has been initialized.
1133 */ 1452 */
1134void synchronize_kernel(void) 1453void __init rcu_init_sched(void)
1135{ 1454{
1136 synchronize_rcu(); 1455 rcu_sched_grace_period_task = kthread_run(rcu_sched_grace_period,
1456 NULL,
1457 "rcu_sched_grace_period");
1458 WARN_ON(IS_ERR(rcu_sched_grace_period_task));
1137} 1459}
1138 1460
1139#ifdef CONFIG_RCU_TRACE 1461#ifdef CONFIG_RCU_TRACE
diff --git a/kernel/rcupreempt_trace.c b/kernel/rcupreempt_trace.c
index 49ac4947af24..5edf82c34bbc 100644
--- a/kernel/rcupreempt_trace.c
+++ b/kernel/rcupreempt_trace.c
@@ -38,7 +38,6 @@
38#include <linux/moduleparam.h> 38#include <linux/moduleparam.h>
39#include <linux/percpu.h> 39#include <linux/percpu.h>
40#include <linux/notifier.h> 40#include <linux/notifier.h>
41#include <linux/rcupdate.h>
42#include <linux/cpu.h> 41#include <linux/cpu.h>
43#include <linux/mutex.h> 42#include <linux/mutex.h>
44#include <linux/rcupreempt_trace.h> 43#include <linux/rcupreempt_trace.h>
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 33acc424667e..0334b6a8baca 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -192,6 +192,7 @@ struct rcu_torture_ops {
192 int (*completed)(void); 192 int (*completed)(void);
193 void (*deferredfree)(struct rcu_torture *p); 193 void (*deferredfree)(struct rcu_torture *p);
194 void (*sync)(void); 194 void (*sync)(void);
195 void (*cb_barrier)(void);
195 int (*stats)(char *page); 196 int (*stats)(char *page);
196 char *name; 197 char *name;
197}; 198};
@@ -265,6 +266,7 @@ static struct rcu_torture_ops rcu_ops = {
265 .completed = rcu_torture_completed, 266 .completed = rcu_torture_completed,
266 .deferredfree = rcu_torture_deferred_free, 267 .deferredfree = rcu_torture_deferred_free,
267 .sync = synchronize_rcu, 268 .sync = synchronize_rcu,
269 .cb_barrier = rcu_barrier,
268 .stats = NULL, 270 .stats = NULL,
269 .name = "rcu" 271 .name = "rcu"
270}; 272};
@@ -304,6 +306,7 @@ static struct rcu_torture_ops rcu_sync_ops = {
304 .completed = rcu_torture_completed, 306 .completed = rcu_torture_completed,
305 .deferredfree = rcu_sync_torture_deferred_free, 307 .deferredfree = rcu_sync_torture_deferred_free,
306 .sync = synchronize_rcu, 308 .sync = synchronize_rcu,
309 .cb_barrier = NULL,
307 .stats = NULL, 310 .stats = NULL,
308 .name = "rcu_sync" 311 .name = "rcu_sync"
309}; 312};
@@ -364,6 +367,7 @@ static struct rcu_torture_ops rcu_bh_ops = {
364 .completed = rcu_bh_torture_completed, 367 .completed = rcu_bh_torture_completed,
365 .deferredfree = rcu_bh_torture_deferred_free, 368 .deferredfree = rcu_bh_torture_deferred_free,
366 .sync = rcu_bh_torture_synchronize, 369 .sync = rcu_bh_torture_synchronize,
370 .cb_barrier = rcu_barrier_bh,
367 .stats = NULL, 371 .stats = NULL,
368 .name = "rcu_bh" 372 .name = "rcu_bh"
369}; 373};
@@ -377,6 +381,7 @@ static struct rcu_torture_ops rcu_bh_sync_ops = {
377 .completed = rcu_bh_torture_completed, 381 .completed = rcu_bh_torture_completed,
378 .deferredfree = rcu_sync_torture_deferred_free, 382 .deferredfree = rcu_sync_torture_deferred_free,
379 .sync = rcu_bh_torture_synchronize, 383 .sync = rcu_bh_torture_synchronize,
384 .cb_barrier = NULL,
380 .stats = NULL, 385 .stats = NULL,
381 .name = "rcu_bh_sync" 386 .name = "rcu_bh_sync"
382}; 387};
@@ -458,6 +463,7 @@ static struct rcu_torture_ops srcu_ops = {
458 .completed = srcu_torture_completed, 463 .completed = srcu_torture_completed,
459 .deferredfree = rcu_sync_torture_deferred_free, 464 .deferredfree = rcu_sync_torture_deferred_free,
460 .sync = srcu_torture_synchronize, 465 .sync = srcu_torture_synchronize,
466 .cb_barrier = NULL,
461 .stats = srcu_torture_stats, 467 .stats = srcu_torture_stats,
462 .name = "srcu" 468 .name = "srcu"
463}; 469};
@@ -482,6 +488,11 @@ static int sched_torture_completed(void)
482 return 0; 488 return 0;
483} 489}
484 490
491static void rcu_sched_torture_deferred_free(struct rcu_torture *p)
492{
493 call_rcu_sched(&p->rtort_rcu, rcu_torture_cb);
494}
495
485static void sched_torture_synchronize(void) 496static void sched_torture_synchronize(void)
486{ 497{
487 synchronize_sched(); 498 synchronize_sched();
@@ -494,12 +505,27 @@ static struct rcu_torture_ops sched_ops = {
494 .readdelay = rcu_read_delay, /* just reuse rcu's version. */ 505 .readdelay = rcu_read_delay, /* just reuse rcu's version. */
495 .readunlock = sched_torture_read_unlock, 506 .readunlock = sched_torture_read_unlock,
496 .completed = sched_torture_completed, 507 .completed = sched_torture_completed,
497 .deferredfree = rcu_sync_torture_deferred_free, 508 .deferredfree = rcu_sched_torture_deferred_free,
498 .sync = sched_torture_synchronize, 509 .sync = sched_torture_synchronize,
510 .cb_barrier = rcu_barrier_sched,
499 .stats = NULL, 511 .stats = NULL,
500 .name = "sched" 512 .name = "sched"
501}; 513};
502 514
515static struct rcu_torture_ops sched_ops_sync = {
516 .init = rcu_sync_torture_init,
517 .cleanup = NULL,
518 .readlock = sched_torture_read_lock,
519 .readdelay = rcu_read_delay, /* just reuse rcu's version. */
520 .readunlock = sched_torture_read_unlock,
521 .completed = sched_torture_completed,
522 .deferredfree = rcu_sync_torture_deferred_free,
523 .sync = sched_torture_synchronize,
524 .cb_barrier = NULL,
525 .stats = NULL,
526 .name = "sched_sync"
527};
528
503/* 529/*
504 * RCU torture writer kthread. Repeatedly substitutes a new structure 530 * RCU torture writer kthread. Repeatedly substitutes a new structure
505 * for that pointed to by rcu_torture_current, freeing the old structure 531 * for that pointed to by rcu_torture_current, freeing the old structure
@@ -848,7 +874,9 @@ rcu_torture_cleanup(void)
848 stats_task = NULL; 874 stats_task = NULL;
849 875
850 /* Wait for all RCU callbacks to fire. */ 876 /* Wait for all RCU callbacks to fire. */
851 rcu_barrier(); 877
878 if (cur_ops->cb_barrier != NULL)
879 cur_ops->cb_barrier();
852 880
853 rcu_torture_stats_print(); /* -After- the stats thread is stopped! */ 881 rcu_torture_stats_print(); /* -After- the stats thread is stopped! */
854 882
@@ -868,7 +896,7 @@ rcu_torture_init(void)
868 int firsterr = 0; 896 int firsterr = 0;
869 static struct rcu_torture_ops *torture_ops[] = 897 static struct rcu_torture_ops *torture_ops[] =
870 { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops, 898 { &rcu_ops, &rcu_sync_ops, &rcu_bh_ops, &rcu_bh_sync_ops,
871 &srcu_ops, &sched_ops, }; 899 &srcu_ops, &sched_ops, &sched_ops_sync, };
872 900
873 /* Process args and tell the world that the torturer is on the job. */ 901 /* Process args and tell the world that the torturer is on the job. */
874 for (i = 0; i < ARRAY_SIZE(torture_ops); i++) { 902 for (i = 0; i < ARRAY_SIZE(torture_ops); i++) {
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index d2099f41aa1e..f51ba2fa2662 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -531,13 +531,14 @@ config BOOT_PRINTK_DELAY
531config RCU_TORTURE_TEST 531config RCU_TORTURE_TEST
532 tristate "torture tests for RCU" 532 tristate "torture tests for RCU"
533 depends on DEBUG_KERNEL 533 depends on DEBUG_KERNEL
534 depends on m
535 default n 534 default n
536 help 535 help
537 This option provides a kernel module that runs torture tests 536 This option provides a kernel module that runs torture tests
538 on the RCU infrastructure. The kernel module may be built 537 on the RCU infrastructure. The kernel module may be built
539 after the fact on the running kernel to be tested, if desired. 538 after the fact on the running kernel to be tested, if desired.
540 539
540 Say Y here if you want RCU torture tests to start automatically
541 at boot time (you probably don't).
541 Say M if you want the RCU torture tests to build as a module. 542 Say M if you want the RCU torture tests to build as a module.
542 Say N if you are unsure. 543 Say N if you are unsure.
543 544
diff --git a/lib/textsearch.c b/lib/textsearch.c
index be8bda3862f5..a3e500ad51d7 100644
--- a/lib/textsearch.c
+++ b/lib/textsearch.c
@@ -97,6 +97,7 @@
97#include <linux/types.h> 97#include <linux/types.h>
98#include <linux/string.h> 98#include <linux/string.h>
99#include <linux/init.h> 99#include <linux/init.h>
100#include <linux/rculist.h>
100#include <linux/rcupdate.h> 101#include <linux/rcupdate.h>
101#include <linux/err.h> 102#include <linux/err.h>
102#include <linux/textsearch.h> 103#include <linux/textsearch.h>
diff --git a/net/802/psnap.c b/net/802/psnap.c
index 31128cb92a23..ea4643931446 100644
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -20,6 +20,7 @@
20#include <linux/mm.h> 20#include <linux/mm.h>
21#include <linux/in.h> 21#include <linux/in.h>
22#include <linux/init.h> 22#include <linux/init.h>
23#include <linux/rculist.h>
23 24
24static LIST_HEAD(snap_list); 25static LIST_HEAD(snap_list);
25static DEFINE_SPINLOCK(snap_lock); 26static DEFINE_SPINLOCK(snap_lock);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index ab2225da0ee2..08f14f6c5fd6 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -27,6 +27,7 @@
27#include <linux/mm.h> 27#include <linux/mm.h>
28#include <linux/in.h> 28#include <linux/in.h>
29#include <linux/init.h> 29#include <linux/init.h>
30#include <linux/rculist.h>
30#include <net/p8022.h> 31#include <net/p8022.h>
31#include <net/arp.h> 32#include <net/arp.h>
32#include <linux/rtnetlink.h> 33#include <linux/rtnetlink.h>
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 72c5976a5ce3..142060f02054 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -15,6 +15,7 @@
15 15
16#include <linux/kernel.h> 16#include <linux/kernel.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/rculist.h>
18#include <linux/spinlock.h> 19#include <linux/spinlock.h>
19#include <linux/times.h> 20#include <linux/times.h>
20#include <linux/netdevice.h> 21#include <linux/netdevice.h>
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index e38034aa56f5..9e96ffcd29a3 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -13,6 +13,7 @@
13 * 2 of the License, or (at your option) any later version. 13 * 2 of the License, or (at your option) any later version.
14 */ 14 */
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16#include <linux/rculist.h>
16 17
17#include "br_private.h" 18#include "br_private.h"
18#include "br_private_stp.h" 19#include "br_private_stp.h"
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 7d1b11703741..8e0b4c8f62a8 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -20,6 +20,7 @@
20#include <linux/err.h> 20#include <linux/err.h>
21#include <linux/kernel.h> 21#include <linux/kernel.h>
22#include <linux/netdevice.h> 22#include <linux/netdevice.h>
23#include <linux/rculist.h>
23 24
24#include <net/netfilter/nf_conntrack.h> 25#include <net/netfilter/nf_conntrack.h>
25#include <net/netfilter/nf_conntrack_l3proto.h> 26#include <net/netfilter/nf_conntrack_l3proto.h>
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 0edefcfc5949..077bcd228799 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -18,6 +18,7 @@
18#include <linux/init.h> 18#include <linux/init.h>
19#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/kernel.h> 20#include <linux/kernel.h>
21#include <linux/rculist.h>
21#include <linux/types.h> 22#include <linux/types.h>
22#include <linux/timer.h> 23#include <linux/timer.h>
23#include <linux/skbuff.h> 24#include <linux/skbuff.h>
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index 02c2f7c0b255..643c032a3a57 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -30,8 +30,7 @@
30 */ 30 */
31 31
32#include <linux/types.h> 32#include <linux/types.h>
33#include <linux/rcupdate.h> 33#include <linux/rculist.h>
34#include <linux/list.h>
35#include <linux/skbuff.h> 34#include <linux/skbuff.h>
36#include <linux/spinlock.h> 35#include <linux/spinlock.h>
37#include <linux/string.h> 36#include <linux/string.h>