diff options
Diffstat (limited to 'Documentation/RCU')
-rw-r--r-- | Documentation/RCU/00-INDEX | 10 | ||||
-rw-r--r-- | Documentation/RCU/NMI-RCU.txt | 39 | ||||
-rw-r--r-- | Documentation/RCU/RTFP.txt | 61 | ||||
-rw-r--r-- | Documentation/RCU/checklist.txt | 215 | ||||
-rw-r--r-- | Documentation/RCU/lockdep.txt | 91 | ||||
-rw-r--r-- | Documentation/RCU/rcu.txt | 48 | ||||
-rw-r--r-- | Documentation/RCU/stallwarn.txt | 58 | ||||
-rw-r--r-- | Documentation/RCU/torture.txt | 12 | ||||
-rw-r--r-- | Documentation/RCU/trace.txt | 254 | ||||
-rw-r--r-- | Documentation/RCU/whatisRCU.txt | 24 |
10 files changed, 447 insertions, 365 deletions
diff --git a/Documentation/RCU/00-INDEX b/Documentation/RCU/00-INDEX index 9bb62f7b89c3..71b6f500ddb9 100644 --- a/Documentation/RCU/00-INDEX +++ b/Documentation/RCU/00-INDEX | |||
@@ -6,16 +6,22 @@ checklist.txt | |||
6 | - Review Checklist for RCU Patches | 6 | - Review Checklist for RCU Patches |
7 | listRCU.txt | 7 | listRCU.txt |
8 | - Using RCU to Protect Read-Mostly Linked Lists | 8 | - Using RCU to Protect Read-Mostly Linked Lists |
9 | lockdep.txt | ||
10 | - RCU and lockdep checking | ||
9 | NMI-RCU.txt | 11 | NMI-RCU.txt |
10 | - Using RCU to Protect Dynamic NMI Handlers | 12 | - Using RCU to Protect Dynamic NMI Handlers |
13 | rcubarrier.txt | ||
14 | - RCU and Unloadable Modules | ||
15 | rculist_nulls.txt | ||
16 | - RCU list primitives for use with SLAB_DESTROY_BY_RCU | ||
11 | rcuref.txt | 17 | rcuref.txt |
12 | - Reference-count design for elements of lists/arrays protected by RCU | 18 | - Reference-count design for elements of lists/arrays protected by RCU |
13 | rcu.txt | 19 | rcu.txt |
14 | - RCU Concepts | 20 | - RCU Concepts |
15 | rcubarrier.txt | ||
16 | - Unloading modules that use RCU callbacks | ||
17 | RTFP.txt | 21 | RTFP.txt |
18 | - List of RCU papers (bibliography) going back to 1980. | 22 | - List of RCU papers (bibliography) going back to 1980. |
23 | stallwarn.txt | ||
24 | - RCU CPU stall warnings (CONFIG_RCU_CPU_STALL_DETECTOR) | ||
19 | torture.txt | 25 | torture.txt |
20 | - RCU Torture Test Operation (CONFIG_RCU_TORTURE_TEST) | 26 | - RCU Torture Test Operation (CONFIG_RCU_TORTURE_TEST) |
21 | trace.txt | 27 | trace.txt |
diff --git a/Documentation/RCU/NMI-RCU.txt b/Documentation/RCU/NMI-RCU.txt index a6d32e65d222..a8536cb88091 100644 --- a/Documentation/RCU/NMI-RCU.txt +++ b/Documentation/RCU/NMI-RCU.txt | |||
@@ -34,7 +34,7 @@ NMI handler. | |||
34 | cpu = smp_processor_id(); | 34 | cpu = smp_processor_id(); |
35 | ++nmi_count(cpu); | 35 | ++nmi_count(cpu); |
36 | 36 | ||
37 | if (!rcu_dereference(nmi_callback)(regs, cpu)) | 37 | if (!rcu_dereference_sched(nmi_callback)(regs, cpu)) |
38 | default_do_nmi(regs); | 38 | default_do_nmi(regs); |
39 | 39 | ||
40 | nmi_exit(); | 40 | nmi_exit(); |
@@ -47,12 +47,13 @@ function pointer. If this handler returns zero, do_nmi() invokes the | |||
47 | default_do_nmi() function to handle a machine-specific NMI. Finally, | 47 | default_do_nmi() function to handle a machine-specific NMI. Finally, |
48 | preemption is restored. | 48 | preemption is restored. |
49 | 49 | ||
50 | Strictly speaking, rcu_dereference() is not needed, since this code runs | 50 | In theory, rcu_dereference_sched() is not needed, since this code runs |
51 | only on i386, which does not need rcu_dereference() anyway. However, | 51 | only on i386, which in theory does not need rcu_dereference_sched() |
52 | it is a good documentation aid, particularly for anyone attempting to | 52 | anyway. However, in practice it is a good documentation aid, particularly |
53 | do something similar on Alpha. | 53 | for anyone attempting to do something similar on Alpha or on systems |
54 | with aggressive optimizing compilers. | ||
54 | 55 | ||
55 | Quick Quiz: Why might the rcu_dereference() be necessary on Alpha, | 56 | Quick Quiz: Why might the rcu_dereference_sched() be necessary on Alpha, |
56 | given that the code referenced by the pointer is read-only? | 57 | given that the code referenced by the pointer is read-only? |
57 | 58 | ||
58 | 59 | ||
@@ -99,17 +100,21 @@ invoke irq_enter() and irq_exit() on NMI entry and exit, respectively. | |||
99 | 100 | ||
100 | Answer to Quick Quiz | 101 | Answer to Quick Quiz |
101 | 102 | ||
102 | Why might the rcu_dereference() be necessary on Alpha, given | 103 | Why might the rcu_dereference_sched() be necessary on Alpha, given |
103 | that the code referenced by the pointer is read-only? | 104 | that the code referenced by the pointer is read-only? |
104 | 105 | ||
105 | Answer: The caller to set_nmi_callback() might well have | 106 | Answer: The caller to set_nmi_callback() might well have |
106 | initialized some data that is to be used by the | 107 | initialized some data that is to be used by the new NMI |
107 | new NMI handler. In this case, the rcu_dereference() | 108 | handler. In this case, the rcu_dereference_sched() would |
108 | would be needed, because otherwise a CPU that received | 109 | be needed, because otherwise a CPU that received an NMI |
109 | an NMI just after the new handler was set might see | 110 | just after the new handler was set might see the pointer |
110 | the pointer to the new NMI handler, but the old | 111 | to the new NMI handler, but the old pre-initialized |
111 | pre-initialized version of the handler's data. | 112 | version of the handler's data. |
112 | 113 | ||
113 | More important, the rcu_dereference() makes it clear | 114 | This same sad story can happen on other CPUs when using |
114 | to someone reading the code that the pointer is being | 115 | a compiler with aggressive pointer-value speculation |
115 | protected by RCU. | 116 | optimizations. |
117 | |||
118 | More important, the rcu_dereference_sched() makes it | ||
119 | clear to someone reading the code that the pointer is | ||
120 | being protected by RCU-sched. | ||
diff --git a/Documentation/RCU/RTFP.txt b/Documentation/RCU/RTFP.txt index d2b85237c76e..5aea459e3dd6 100644 --- a/Documentation/RCU/RTFP.txt +++ b/Documentation/RCU/RTFP.txt | |||
@@ -25,10 +25,10 @@ to be referencing the data structure. However, this mechanism was not | |||
25 | optimized for modern computer systems, which is not surprising given | 25 | optimized for modern computer systems, which is not surprising given |
26 | that these overheads were not so expensive in the mid-80s. Nonetheless, | 26 | that these overheads were not so expensive in the mid-80s. Nonetheless, |
27 | passive serialization appears to be the first deferred-destruction | 27 | passive serialization appears to be the first deferred-destruction |
28 | mechanism to be used in production. Furthermore, the relevant patent has | 28 | mechanism to be used in production. Furthermore, the relevant patent |
29 | lapsed, so this approach may be used in non-GPL software, if desired. | 29 | has lapsed, so this approach may be used in non-GPL software, if desired. |
30 | (In contrast, use of RCU is permitted only in software licensed under | 30 | (In contrast, implementation of RCU is permitted only in software licensed |
31 | GPL. Sorry!!!) | 31 | under either GPL or LGPL. Sorry!!!) |
32 | 32 | ||
33 | In 1990, Pugh [Pugh90] noted that explicitly tracking which threads | 33 | In 1990, Pugh [Pugh90] noted that explicitly tracking which threads |
34 | were reading a given data structure permitted deferred free to operate | 34 | were reading a given data structure permitted deferred free to operate |
@@ -150,6 +150,18 @@ preemptible RCU [PaulEMcKenney2007PreemptibleRCU], and the three-part | |||
150 | LWN "What is RCU?" series [PaulEMcKenney2007WhatIsRCUFundamentally, | 150 | LWN "What is RCU?" series [PaulEMcKenney2007WhatIsRCUFundamentally, |
151 | PaulEMcKenney2008WhatIsRCUUsage, and PaulEMcKenney2008WhatIsRCUAPI]. | 151 | PaulEMcKenney2008WhatIsRCUUsage, and PaulEMcKenney2008WhatIsRCUAPI]. |
152 | 152 | ||
153 | 2008 saw a journal paper on real-time RCU [DinakarGuniguntala2008IBMSysJ], | ||
154 | a history of how Linux changed RCU more than RCU changed Linux | ||
155 | [PaulEMcKenney2008RCUOSR], and a design overview of hierarchical RCU | ||
156 | [PaulEMcKenney2008HierarchicalRCU]. | ||
157 | |||
158 | 2009 introduced user-level RCU algorithms [PaulEMcKenney2009MaliciousURCU], | ||
159 | which Mathieu Desnoyers is now maintaining [MathieuDesnoyers2009URCU] | ||
160 | [MathieuDesnoyersPhD]. TINY_RCU [PaulEMcKenney2009BloatWatchRCU] made | ||
161 | its appearance, as did expedited RCU [PaulEMcKenney2009expeditedRCU]. | ||
162 | The problem of resizeable RCU-protected hash tables may now be on a path | ||
163 | to a solution [JoshTriplett2009RPHash]. | ||
164 | |||
153 | Bibtex Entries | 165 | Bibtex Entries |
154 | 166 | ||
155 | @article{Kung80 | 167 | @article{Kung80 |
@@ -730,6 +742,11 @@ Revised: | |||
730 | " | 742 | " |
731 | } | 743 | } |
732 | 744 | ||
745 | # | ||
746 | # "What is RCU?" LWN series. | ||
747 | # | ||
748 | ######################################################################## | ||
749 | |||
733 | @article{DinakarGuniguntala2008IBMSysJ | 750 | @article{DinakarGuniguntala2008IBMSysJ |
734 | ,author="D. Guniguntala and P. E. McKenney and J. Triplett and J. Walpole" | 751 | ,author="D. Guniguntala and P. E. McKenney and J. Triplett and J. Walpole" |
735 | ,title="The read-copy-update mechanism for supporting real-time applications on shared-memory multiprocessor systems with {Linux}" | 752 | ,title="The read-copy-update mechanism for supporting real-time applications on shared-memory multiprocessor systems with {Linux}" |
@@ -820,3 +837,39 @@ Revised: | |||
820 | Uniprocessor assumptions allow simplified RCU implementation. | 837 | Uniprocessor assumptions allow simplified RCU implementation. |
821 | " | 838 | " |
822 | } | 839 | } |
840 | |||
841 | @unpublished{PaulEMcKenney2009expeditedRCU | ||
842 | ,Author="Paul E. McKenney" | ||
843 | ,Title="[{PATCH} -tip 0/3] expedited 'big hammer' {RCU} grace periods" | ||
844 | ,month="June" | ||
845 | ,day="25" | ||
846 | ,year="2009" | ||
847 | ,note="Available: | ||
848 | \url{http://lkml.org/lkml/2009/6/25/306} | ||
849 | [Viewed August 16, 2009]" | ||
850 | ,annotation=" | ||
851 | First posting of expedited RCU to be accepted into -tip. | ||
852 | " | ||
853 | } | ||
854 | |||
855 | @unpublished{JoshTriplett2009RPHash | ||
856 | ,Author="Josh Triplett" | ||
857 | ,Title="Scalable concurrent hash tables via relativistic programming" | ||
858 | ,month="September" | ||
859 | ,year="2009" | ||
860 | ,note="Linux Plumbers Conference presentation" | ||
861 | ,annotation=" | ||
862 | RP fun with hash tables. | ||
863 | " | ||
864 | } | ||
865 | |||
866 | @phdthesis{MathieuDesnoyersPhD | ||
867 | , title = "Low-Impact Operating System Tracing" | ||
868 | , author = "Mathieu Desnoyers" | ||
869 | , school = "Ecole Polytechnique de Montr\'{e}al" | ||
870 | , month = "December" | ||
871 | , year = 2009 | ||
872 | ,note="Available: | ||
873 | \url{http://www.lttng.org/pub/thesis/desnoyers-dissertation-2009-12.pdf} | ||
874 | [Viewed December 9, 2009]" | ||
875 | } | ||
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index 51525a30e8b4..790d1a812376 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt | |||
@@ -8,13 +8,12 @@ would cause. This list is based on experiences reviewing such patches | |||
8 | over a rather long period of time, but improvements are always welcome! | 8 | over a rather long period of time, but improvements are always welcome! |
9 | 9 | ||
10 | 0. Is RCU being applied to a read-mostly situation? If the data | 10 | 0. Is RCU being applied to a read-mostly situation? If the data |
11 | structure is updated more than about 10% of the time, then | 11 | structure is updated more than about 10% of the time, then you |
12 | you should strongly consider some other approach, unless | 12 | should strongly consider some other approach, unless detailed |
13 | detailed performance measurements show that RCU is nonetheless | 13 | performance measurements show that RCU is nonetheless the right |
14 | the right tool for the job. Yes, you might think of RCU | 14 | tool for the job. Yes, RCU does reduce read-side overhead by |
15 | as simply cutting overhead off of the readers and imposing it | 15 | increasing write-side overhead, which is exactly why normal uses |
16 | on the writers. That is exactly why normal uses of RCU will | 16 | of RCU will do much more reading than updating. |
17 | do much more reading than updating. | ||
18 | 17 | ||
19 | Another exception is where performance is not an issue, and RCU | 18 | Another exception is where performance is not an issue, and RCU |
20 | provides a simpler implementation. An example of this situation | 19 | provides a simpler implementation. An example of this situation |
@@ -35,13 +34,13 @@ over a rather long period of time, but improvements are always welcome! | |||
35 | 34 | ||
36 | If you choose #b, be prepared to describe how you have handled | 35 | If you choose #b, be prepared to describe how you have handled |
37 | memory barriers on weakly ordered machines (pretty much all of | 36 | memory barriers on weakly ordered machines (pretty much all of |
38 | them -- even x86 allows reads to be reordered), and be prepared | 37 | them -- even x86 allows later loads to be reordered to precede |
39 | to explain why this added complexity is worthwhile. If you | 38 | earlier stores), and be prepared to explain why this added |
40 | choose #c, be prepared to explain how this single task does not | 39 | complexity is worthwhile. If you choose #c, be prepared to |
41 | become a major bottleneck on big multiprocessor machines (for | 40 | explain how this single task does not become a major bottleneck on |
42 | example, if the task is updating information relating to itself | 41 | big multiprocessor machines (for example, if the task is updating |
43 | that other tasks can read, there by definition can be no | 42 | information relating to itself that other tasks can read, there |
44 | bottleneck). | 43 | by definition can be no bottleneck). |
45 | 44 | ||
46 | 2. Do the RCU read-side critical sections make proper use of | 45 | 2. Do the RCU read-side critical sections make proper use of |
47 | rcu_read_lock() and friends? These primitives are needed | 46 | rcu_read_lock() and friends? These primitives are needed |
@@ -51,8 +50,10 @@ over a rather long period of time, but improvements are always welcome! | |||
51 | actuarial risk of your kernel. | 50 | actuarial risk of your kernel. |
52 | 51 | ||
53 | As a rough rule of thumb, any dereference of an RCU-protected | 52 | As a rough rule of thumb, any dereference of an RCU-protected |
54 | pointer must be covered by rcu_read_lock() or rcu_read_lock_bh() | 53 | pointer must be covered by rcu_read_lock(), rcu_read_lock_bh(), |
55 | or by the appropriate update-side lock. | 54 | rcu_read_lock_sched(), or by the appropriate update-side lock. |
55 | Disabling of preemption can serve as rcu_read_lock_sched(), but | ||
56 | is less readable. | ||
56 | 57 | ||
57 | 3. Does the update code tolerate concurrent accesses? | 58 | 3. Does the update code tolerate concurrent accesses? |
58 | 59 | ||
@@ -62,25 +63,27 @@ over a rather long period of time, but improvements are always welcome! | |||
62 | of ways to handle this concurrency, depending on the situation: | 63 | of ways to handle this concurrency, depending on the situation: |
63 | 64 | ||
64 | a. Use the RCU variants of the list and hlist update | 65 | a. Use the RCU variants of the list and hlist update |
65 | primitives to add, remove, and replace elements on an | 66 | primitives to add, remove, and replace elements on |
66 | RCU-protected list. Alternatively, use the RCU-protected | 67 | an RCU-protected list. Alternatively, use the other |
67 | trees that have been added to the Linux kernel. | 68 | RCU-protected data structures that have been added to |
69 | the Linux kernel. | ||
68 | 70 | ||
69 | This is almost always the best approach. | 71 | This is almost always the best approach. |
70 | 72 | ||
71 | b. Proceed as in (a) above, but also maintain per-element | 73 | b. Proceed as in (a) above, but also maintain per-element |
72 | locks (that are acquired by both readers and writers) | 74 | locks (that are acquired by both readers and writers) |
73 | that guard per-element state. Of course, fields that | 75 | that guard per-element state. Of course, fields that |
74 | the readers refrain from accessing can be guarded by the | 76 | the readers refrain from accessing can be guarded by |
75 | update-side lock. | 77 | some other lock acquired only by updaters, if desired. |
76 | 78 | ||
77 | This works quite well, also. | 79 | This works quite well, also. |
78 | 80 | ||
79 | c. Make updates appear atomic to readers. For example, | 81 | c. Make updates appear atomic to readers. For example, |
80 | pointer updates to properly aligned fields will appear | 82 | pointer updates to properly aligned fields will |
81 | atomic, as will individual atomic primitives. Operations | 83 | appear atomic, as will individual atomic primitives. |
82 | performed under a lock and sequences of multiple atomic | 84 | Sequences of perations performed under a lock will -not- |
83 | primitives will -not- appear to be atomic. | 85 | appear to be atomic to RCU readers, nor will sequences |
86 | of multiple atomic primitives. | ||
84 | 87 | ||
85 | This can work, but is starting to get a bit tricky. | 88 | This can work, but is starting to get a bit tricky. |
86 | 89 | ||
@@ -98,9 +101,9 @@ over a rather long period of time, but improvements are always welcome! | |||
98 | a new structure containing updated values. | 101 | a new structure containing updated values. |
99 | 102 | ||
100 | 4. Weakly ordered CPUs pose special challenges. Almost all CPUs | 103 | 4. Weakly ordered CPUs pose special challenges. Almost all CPUs |
101 | are weakly ordered -- even i386 CPUs allow reads to be reordered. | 104 | are weakly ordered -- even x86 CPUs allow later loads to be |
102 | RCU code must take all of the following measures to prevent | 105 | reordered to precede earlier stores. RCU code must take all of |
103 | memory-corruption problems: | 106 | the following measures to prevent memory-corruption problems: |
104 | 107 | ||
105 | a. Readers must maintain proper ordering of their memory | 108 | a. Readers must maintain proper ordering of their memory |
106 | accesses. The rcu_dereference() primitive ensures that | 109 | accesses. The rcu_dereference() primitive ensures that |
@@ -113,14 +116,25 @@ over a rather long period of time, but improvements are always welcome! | |||
113 | The rcu_dereference() primitive is also an excellent | 116 | The rcu_dereference() primitive is also an excellent |
114 | documentation aid, letting the person reading the code | 117 | documentation aid, letting the person reading the code |
115 | know exactly which pointers are protected by RCU. | 118 | know exactly which pointers are protected by RCU. |
116 | 119 | Please note that compilers can also reorder code, and | |
117 | The rcu_dereference() primitive is used by the various | 120 | they are becoming increasingly aggressive about doing |
118 | "_rcu()" list-traversal primitives, such as the | 121 | just that. The rcu_dereference() primitive therefore |
119 | list_for_each_entry_rcu(). Note that it is perfectly | 122 | also prevents destructive compiler optimizations. |
120 | legal (if redundant) for update-side code to use | 123 | |
121 | rcu_dereference() and the "_rcu()" list-traversal | 124 | The rcu_dereference() primitive is used by the |
122 | primitives. This is particularly useful in code | 125 | various "_rcu()" list-traversal primitives, such |
123 | that is common to readers and updaters. | 126 | as the list_for_each_entry_rcu(). Note that it is |
127 | perfectly legal (if redundant) for update-side code to | ||
128 | use rcu_dereference() and the "_rcu()" list-traversal | ||
129 | primitives. This is particularly useful in code that | ||
130 | is common to readers and updaters. However, lockdep | ||
131 | will complain if you access rcu_dereference() outside | ||
132 | of an RCU read-side critical section. See lockdep.txt | ||
133 | to learn what to do about this. | ||
134 | |||
135 | Of course, neither rcu_dereference() nor the "_rcu()" | ||
136 | list-traversal primitives can substitute for a good | ||
137 | concurrency design coordinating among multiple updaters. | ||
124 | 138 | ||
125 | b. If the list macros are being used, the list_add_tail_rcu() | 139 | b. If the list macros are being used, the list_add_tail_rcu() |
126 | and list_add_rcu() primitives must be used in order | 140 | and list_add_rcu() primitives must be used in order |
@@ -135,11 +149,14 @@ over a rather long period of time, but improvements are always welcome! | |||
135 | readers. Similarly, if the hlist macros are being used, | 149 | readers. Similarly, if the hlist macros are being used, |
136 | the hlist_del_rcu() primitive is required. | 150 | the hlist_del_rcu() primitive is required. |
137 | 151 | ||
138 | The list_replace_rcu() primitive may be used to | 152 | The list_replace_rcu() and hlist_replace_rcu() primitives |
139 | replace an old structure with a new one in an | 153 | may be used to replace an old structure with a new one |
140 | RCU-protected list. | 154 | in their respective types of RCU-protected lists. |
155 | |||
156 | d. Rules similar to (4b) and (4c) apply to the "hlist_nulls" | ||
157 | type of RCU-protected linked lists. | ||
141 | 158 | ||
142 | d. Updates must ensure that initialization of a given | 159 | e. Updates must ensure that initialization of a given |
143 | structure happens before pointers to that structure are | 160 | structure happens before pointers to that structure are |
144 | publicized. Use the rcu_assign_pointer() primitive | 161 | publicized. Use the rcu_assign_pointer() primitive |
145 | when publicizing a pointer to a structure that can | 162 | when publicizing a pointer to a structure that can |
@@ -151,16 +168,31 @@ over a rather long period of time, but improvements are always welcome! | |||
151 | it cannot block. | 168 | it cannot block. |
152 | 169 | ||
153 | 6. Since synchronize_rcu() can block, it cannot be called from | 170 | 6. Since synchronize_rcu() can block, it cannot be called from |
154 | any sort of irq context. Ditto for synchronize_sched() and | 171 | any sort of irq context. The same rule applies for |
155 | synchronize_srcu(). | 172 | synchronize_rcu_bh(), synchronize_sched(), synchronize_srcu(), |
156 | 173 | synchronize_rcu_expedited(), synchronize_rcu_bh_expedited(), | |
157 | 7. If the updater uses call_rcu(), then the corresponding readers | 174 | synchronize_sched_expedite(), and synchronize_srcu_expedited(). |
158 | must use rcu_read_lock() and rcu_read_unlock(). If the updater | 175 | |
159 | uses call_rcu_bh(), then the corresponding readers must use | 176 | The expedited forms of these primitives have the same semantics |
160 | rcu_read_lock_bh() and rcu_read_unlock_bh(). If the updater | 177 | as the non-expedited forms, but expediting is both expensive |
161 | uses call_rcu_sched(), then the corresponding readers must | 178 | and unfriendly to real-time workloads. Use of the expedited |
162 | disable preemption. Mixing things up will result in confusion | 179 | primitives should be restricted to rare configuration-change |
163 | and broken kernels. | 180 | operations that would not normally be undertaken while a real-time |
181 | workload is running. | ||
182 | |||
183 | 7. If the updater uses call_rcu() or synchronize_rcu(), then the | ||
184 | corresponding readers must use rcu_read_lock() and | ||
185 | rcu_read_unlock(). If the updater uses call_rcu_bh() or | ||
186 | synchronize_rcu_bh(), then the corresponding readers must | ||
187 | use rcu_read_lock_bh() and rcu_read_unlock_bh(). If the | ||
188 | updater uses call_rcu_sched() or synchronize_sched(), then | ||
189 | the corresponding readers must disable preemption, possibly | ||
190 | by calling rcu_read_lock_sched() and rcu_read_unlock_sched(). | ||
191 | If the updater uses synchronize_srcu(), the the corresponding | ||
192 | readers must use srcu_read_lock() and srcu_read_unlock(), | ||
193 | and with the same srcu_struct. The rules for the expedited | ||
194 | primitives are the same as for their non-expedited counterparts. | ||
195 | Mixing things up will result in confusion and broken kernels. | ||
164 | 196 | ||
165 | One exception to this rule: rcu_read_lock() and rcu_read_unlock() | 197 | One exception to this rule: rcu_read_lock() and rcu_read_unlock() |
166 | may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh() | 198 | may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh() |
@@ -212,6 +244,8 @@ over a rather long period of time, but improvements are always welcome! | |||
212 | e. Periodically invoke synchronize_rcu(), permitting a limited | 244 | e. Periodically invoke synchronize_rcu(), permitting a limited |
213 | number of updates per grace period. | 245 | number of updates per grace period. |
214 | 246 | ||
247 | The same cautions apply to call_rcu_bh() and call_rcu_sched(). | ||
248 | |||
215 | 9. All RCU list-traversal primitives, which include | 249 | 9. All RCU list-traversal primitives, which include |
216 | rcu_dereference(), list_for_each_entry_rcu(), | 250 | rcu_dereference(), list_for_each_entry_rcu(), |
217 | list_for_each_continue_rcu(), and list_for_each_safe_rcu(), | 251 | list_for_each_continue_rcu(), and list_for_each_safe_rcu(), |
@@ -219,17 +253,21 @@ over a rather long period of time, but improvements are always welcome! | |||
219 | must be protected by appropriate update-side locks. RCU | 253 | must be protected by appropriate update-side locks. RCU |
220 | read-side critical sections are delimited by rcu_read_lock() | 254 | read-side critical sections are delimited by rcu_read_lock() |
221 | and rcu_read_unlock(), or by similar primitives such as | 255 | and rcu_read_unlock(), or by similar primitives such as |
222 | rcu_read_lock_bh() and rcu_read_unlock_bh(). | 256 | rcu_read_lock_bh() and rcu_read_unlock_bh(), in which case |
257 | the matching rcu_dereference() primitive must be used in order | ||
258 | to keep lockdep happy, in this case, rcu_dereference_bh(). | ||
223 | 259 | ||
224 | The reason that it is permissible to use RCU list-traversal | 260 | The reason that it is permissible to use RCU list-traversal |
225 | primitives when the update-side lock is held is that doing so | 261 | primitives when the update-side lock is held is that doing so |
226 | can be quite helpful in reducing code bloat when common code is | 262 | can be quite helpful in reducing code bloat when common code is |
227 | shared between readers and updaters. | 263 | shared between readers and updaters. Additional primitives |
264 | are provided for this case, as discussed in lockdep.txt. | ||
228 | 265 | ||
229 | 10. Conversely, if you are in an RCU read-side critical section, | 266 | 10. Conversely, if you are in an RCU read-side critical section, |
230 | and you don't hold the appropriate update-side lock, you -must- | 267 | and you don't hold the appropriate update-side lock, you -must- |
231 | use the "_rcu()" variants of the list macros. Failing to do so | 268 | use the "_rcu()" variants of the list macros. Failing to do so |
232 | will break Alpha and confuse people reading your code. | 269 | will break Alpha, cause aggressive compilers to generate bad code, |
270 | and confuse people trying to read your code. | ||
233 | 271 | ||
234 | 11. Note that synchronize_rcu() -only- guarantees to wait until | 272 | 11. Note that synchronize_rcu() -only- guarantees to wait until |
235 | all currently executing rcu_read_lock()-protected RCU read-side | 273 | all currently executing rcu_read_lock()-protected RCU read-side |
@@ -239,15 +277,21 @@ over a rather long period of time, but improvements are always welcome! | |||
239 | rcu_read_lock()-protected read-side critical sections, do -not- | 277 | rcu_read_lock()-protected read-side critical sections, do -not- |
240 | use synchronize_rcu(). | 278 | use synchronize_rcu(). |
241 | 279 | ||
242 | If you want to wait for some of these other things, you might | 280 | Similarly, disabling preemption is not an acceptable substitute |
243 | instead need to use synchronize_irq() or synchronize_sched(). | 281 | for rcu_read_lock(). Code that attempts to use preemption |
282 | disabling where it should be using rcu_read_lock() will break | ||
283 | in real-time kernel builds. | ||
284 | |||
285 | If you want to wait for interrupt handlers, NMI handlers, and | ||
286 | code under the influence of preempt_disable(), you instead | ||
287 | need to use synchronize_irq() or synchronize_sched(). | ||
244 | 288 | ||
245 | 12. Any lock acquired by an RCU callback must be acquired elsewhere | 289 | 12. Any lock acquired by an RCU callback must be acquired elsewhere |
246 | with softirq disabled, e.g., via spin_lock_irqsave(), | 290 | with softirq disabled, e.g., via spin_lock_irqsave(), |
247 | spin_lock_bh(), etc. Failing to disable irq on a given | 291 | spin_lock_bh(), etc. Failing to disable irq on a given |
248 | acquisition of that lock will result in deadlock as soon as the | 292 | acquisition of that lock will result in deadlock as soon as |
249 | RCU callback happens to interrupt that acquisition's critical | 293 | the RCU softirq handler happens to run your RCU callback while |
250 | section. | 294 | interrupting that acquisition's critical section. |
251 | 295 | ||
252 | 13. RCU callbacks can be and are executed in parallel. In many cases, | 296 | 13. RCU callbacks can be and are executed in parallel. In many cases, |
253 | the callback code simply wrappers around kfree(), so that this | 297 | the callback code simply wrappers around kfree(), so that this |
@@ -265,29 +309,30 @@ over a rather long period of time, but improvements are always welcome! | |||
265 | not the case, a self-spawning RCU callback would prevent the | 309 | not the case, a self-spawning RCU callback would prevent the |
266 | victim CPU from ever going offline.) | 310 | victim CPU from ever going offline.) |
267 | 311 | ||
268 | 14. SRCU (srcu_read_lock(), srcu_read_unlock(), and synchronize_srcu()) | 312 | 14. SRCU (srcu_read_lock(), srcu_read_unlock(), srcu_dereference(), |
269 | may only be invoked from process context. Unlike other forms of | 313 | synchronize_srcu(), and synchronize_srcu_expedited()) may only |
270 | RCU, it -is- permissible to block in an SRCU read-side critical | 314 | be invoked from process context. Unlike other forms of RCU, it |
271 | section (demarked by srcu_read_lock() and srcu_read_unlock()), | 315 | -is- permissible to block in an SRCU read-side critical section |
272 | hence the "SRCU": "sleepable RCU". Please note that if you | 316 | (demarked by srcu_read_lock() and srcu_read_unlock()), hence the |
273 | don't need to sleep in read-side critical sections, you should | 317 | "SRCU": "sleepable RCU". Please note that if you don't need |
274 | be using RCU rather than SRCU, because RCU is almost always | 318 | to sleep in read-side critical sections, you should be using |
275 | faster and easier to use than is SRCU. | 319 | RCU rather than SRCU, because RCU is almost always faster and |
320 | easier to use than is SRCU. | ||
276 | 321 | ||
277 | Also unlike other forms of RCU, explicit initialization | 322 | Also unlike other forms of RCU, explicit initialization |
278 | and cleanup is required via init_srcu_struct() and | 323 | and cleanup is required via init_srcu_struct() and |
279 | cleanup_srcu_struct(). These are passed a "struct srcu_struct" | 324 | cleanup_srcu_struct(). These are passed a "struct srcu_struct" |
280 | that defines the scope of a given SRCU domain. Once initialized, | 325 | that defines the scope of a given SRCU domain. Once initialized, |
281 | the srcu_struct is passed to srcu_read_lock(), srcu_read_unlock() | 326 | the srcu_struct is passed to srcu_read_lock(), srcu_read_unlock() |
282 | and synchronize_srcu(). A given synchronize_srcu() waits only | 327 | synchronize_srcu(), and synchronize_srcu_expedited(). A given |
283 | for SRCU read-side critical sections governed by srcu_read_lock() | 328 | synchronize_srcu() waits only for SRCU read-side critical |
284 | and srcu_read_unlock() calls that have been passd the same | 329 | sections governed by srcu_read_lock() and srcu_read_unlock() |
285 | srcu_struct. This property is what makes sleeping read-side | 330 | calls that have been passed the same srcu_struct. This property |
286 | critical sections tolerable -- a given subsystem delays only | 331 | is what makes sleeping read-side critical sections tolerable -- |
287 | its own updates, not those of other subsystems using SRCU. | 332 | a given subsystem delays only its own updates, not those of other |
288 | Therefore, SRCU is less prone to OOM the system than RCU would | 333 | subsystems using SRCU. Therefore, SRCU is less prone to OOM the |
289 | be if RCU's read-side critical sections were permitted to | 334 | system than RCU would be if RCU's read-side critical sections |
290 | sleep. | 335 | were permitted to sleep. |
291 | 336 | ||
292 | The ability to sleep in read-side critical sections does not | 337 | The ability to sleep in read-side critical sections does not |
293 | come for free. First, corresponding srcu_read_lock() and | 338 | come for free. First, corresponding srcu_read_lock() and |
@@ -300,8 +345,8 @@ over a rather long period of time, but improvements are always welcome! | |||
300 | requiring SRCU's read-side deadlock immunity or low read-side | 345 | requiring SRCU's read-side deadlock immunity or low read-side |
301 | realtime latency. | 346 | realtime latency. |
302 | 347 | ||
303 | Note that, rcu_assign_pointer() and rcu_dereference() relate to | 348 | Note that, rcu_assign_pointer() relates to SRCU just as they do |
304 | SRCU just as they do to other forms of RCU. | 349 | to other forms of RCU. |
305 | 350 | ||
306 | 15. The whole point of call_rcu(), synchronize_rcu(), and friends | 351 | 15. The whole point of call_rcu(), synchronize_rcu(), and friends |
307 | is to wait until all pre-existing readers have finished before | 352 | is to wait until all pre-existing readers have finished before |
@@ -311,12 +356,12 @@ over a rather long period of time, but improvements are always welcome! | |||
311 | destructive operation, and -only- -then- invoke call_rcu(), | 356 | destructive operation, and -only- -then- invoke call_rcu(), |
312 | synchronize_rcu(), or friends. | 357 | synchronize_rcu(), or friends. |
313 | 358 | ||
314 | Because these primitives only wait for pre-existing readers, | 359 | Because these primitives only wait for pre-existing readers, it |
315 | it is the caller's responsibility to guarantee safety to | 360 | is the caller's responsibility to guarantee that any subsequent |
316 | any subsequent readers. | 361 | readers will execute safely. |
317 | 362 | ||
318 | 16. The various RCU read-side primitives do -not- contain memory | 363 | 16. The various RCU read-side primitives do -not- necessarily contain |
319 | barriers. The CPU (and in some cases, the compiler) is free | 364 | memory barriers. You should therefore plan for the CPU |
320 | to reorder code into and out of RCU read-side critical sections. | 365 | and the compiler to freely reorder code into and out of RCU |
321 | It is the responsibility of the RCU update-side primitives to | 366 | read-side critical sections. It is the responsibility of the |
322 | deal with this. | 367 | RCU update-side primitives to deal with this. |
diff --git a/Documentation/RCU/lockdep.txt b/Documentation/RCU/lockdep.txt new file mode 100644 index 000000000000..d7a49b2f6994 --- /dev/null +++ b/Documentation/RCU/lockdep.txt | |||
@@ -0,0 +1,91 @@ | |||
1 | RCU and lockdep checking | ||
2 | |||
3 | All flavors of RCU have lockdep checking available, so that lockdep is | ||
4 | aware of when each task enters and leaves any flavor of RCU read-side | ||
5 | critical section. Each flavor of RCU is tracked separately (but note | ||
6 | that this is not the case in 2.6.32 and earlier). This allows lockdep's | ||
7 | tracking to include RCU state, which can sometimes help when debugging | ||
8 | deadlocks and the like. | ||
9 | |||
10 | In addition, RCU provides the following primitives that check lockdep's | ||
11 | state: | ||
12 | |||
13 | rcu_read_lock_held() for normal RCU. | ||
14 | rcu_read_lock_bh_held() for RCU-bh. | ||
15 | rcu_read_lock_sched_held() for RCU-sched. | ||
16 | srcu_read_lock_held() for SRCU. | ||
17 | |||
18 | These functions are conservative, and will therefore return 1 if they | ||
19 | aren't certain (for example, if CONFIG_DEBUG_LOCK_ALLOC is not set). | ||
20 | This prevents things like WARN_ON(!rcu_read_lock_held()) from giving false | ||
21 | positives when lockdep is disabled. | ||
22 | |||
23 | In addition, a separate kernel config parameter CONFIG_PROVE_RCU enables | ||
24 | checking of rcu_dereference() primitives: | ||
25 | |||
26 | rcu_dereference(p): | ||
27 | Check for RCU read-side critical section. | ||
28 | rcu_dereference_bh(p): | ||
29 | Check for RCU-bh read-side critical section. | ||
30 | rcu_dereference_sched(p): | ||
31 | Check for RCU-sched read-side critical section. | ||
32 | srcu_dereference(p, sp): | ||
33 | Check for SRCU read-side critical section. | ||
34 | rcu_dereference_check(p, c): | ||
35 | Use explicit check expression "c". This is useful in | ||
36 | code that is invoked by both readers and updaters. | ||
37 | rcu_dereference_raw(p) | ||
38 | Don't check. (Use sparingly, if at all.) | ||
39 | rcu_dereference_protected(p, c): | ||
40 | Use explicit check expression "c", and omit all barriers | ||
41 | and compiler constraints. This is useful when the data | ||
42 | structure cannot change, for example, in code that is | ||
43 | invoked only by updaters. | ||
44 | rcu_access_pointer(p): | ||
45 | Return the value of the pointer and omit all barriers, | ||
46 | but retain the compiler constraints that prevent duplicating | ||
47 | or coalescsing. This is useful when when testing the | ||
48 | value of the pointer itself, for example, against NULL. | ||
49 | |||
50 | The rcu_dereference_check() check expression can be any boolean | ||
51 | expression, but would normally include one of the rcu_read_lock_held() | ||
52 | family of functions and a lockdep expression. However, any boolean | ||
53 | expression can be used. For a moderately ornate example, consider | ||
54 | the following: | ||
55 | |||
56 | file = rcu_dereference_check(fdt->fd[fd], | ||
57 | rcu_read_lock_held() || | ||
58 | lockdep_is_held(&files->file_lock) || | ||
59 | atomic_read(&files->count) == 1); | ||
60 | |||
61 | This expression picks up the pointer "fdt->fd[fd]" in an RCU-safe manner, | ||
62 | and, if CONFIG_PROVE_RCU is configured, verifies that this expression | ||
63 | is used in: | ||
64 | |||
65 | 1. An RCU read-side critical section, or | ||
66 | 2. with files->file_lock held, or | ||
67 | 3. on an unshared files_struct. | ||
68 | |||
69 | In case (1), the pointer is picked up in an RCU-safe manner for vanilla | ||
70 | RCU read-side critical sections, in case (2) the ->file_lock prevents | ||
71 | any change from taking place, and finally, in case (3) the current task | ||
72 | is the only task accessing the file_struct, again preventing any change | ||
73 | from taking place. If the above statement was invoked only from updater | ||
74 | code, it could instead be written as follows: | ||
75 | |||
76 | file = rcu_dereference_protected(fdt->fd[fd], | ||
77 | lockdep_is_held(&files->file_lock) || | ||
78 | atomic_read(&files->count) == 1); | ||
79 | |||
80 | This would verify cases #2 and #3 above, and furthermore lockdep would | ||
81 | complain if this was used in an RCU read-side critical section unless one | ||
82 | of these two cases held. Because rcu_dereference_protected() omits all | ||
83 | barriers and compiler constraints, it generates better code than do the | ||
84 | other flavors of rcu_dereference(). On the other hand, it is illegal | ||
85 | to use rcu_dereference_protected() if either the RCU-protected pointer | ||
86 | or the RCU-protected data that it points to can change concurrently. | ||
87 | |||
88 | There are currently only "universal" versions of the rcu_assign_pointer() | ||
89 | and RCU list-/tree-traversal primitives, which do not (yet) check for | ||
90 | being in an RCU read-side critical section. In the future, separate | ||
91 | versions of these primitives might be created. | ||
diff --git a/Documentation/RCU/rcu.txt b/Documentation/RCU/rcu.txt index 2a23523ce471..31852705b586 100644 --- a/Documentation/RCU/rcu.txt +++ b/Documentation/RCU/rcu.txt | |||
@@ -75,6 +75,8 @@ o I hear that RCU is patented? What is with that? | |||
75 | search for the string "Patent" in RTFP.txt to find them. | 75 | search for the string "Patent" in RTFP.txt to find them. |
76 | Of these, one was allowed to lapse by the assignee, and the | 76 | Of these, one was allowed to lapse by the assignee, and the |
77 | others have been contributed to the Linux kernel under GPL. | 77 | others have been contributed to the Linux kernel under GPL. |
78 | There are now also LGPL implementations of user-level RCU | ||
79 | available (http://lttng.org/?q=node/18). | ||
78 | 80 | ||
79 | o I hear that RCU needs work in order to support realtime kernels? | 81 | o I hear that RCU needs work in order to support realtime kernels? |
80 | 82 | ||
@@ -91,48 +93,4 @@ o Where can I find more information on RCU? | |||
91 | 93 | ||
92 | o What are all these files in this directory? | 94 | o What are all these files in this directory? |
93 | 95 | ||
94 | 96 | See 00-INDEX for the list. | |
95 | NMI-RCU.txt | ||
96 | |||
97 | Describes how to use RCU to implement dynamic | ||
98 | NMI handlers, which can be revectored on the fly, | ||
99 | without rebooting. | ||
100 | |||
101 | RTFP.txt | ||
102 | |||
103 | List of RCU-related publications and web sites. | ||
104 | |||
105 | UP.txt | ||
106 | |||
107 | Discussion of RCU usage in UP kernels. | ||
108 | |||
109 | arrayRCU.txt | ||
110 | |||
111 | Describes how to use RCU to protect arrays, with | ||
112 | resizeable arrays whose elements reference other | ||
113 | data structures being of the most interest. | ||
114 | |||
115 | checklist.txt | ||
116 | |||
117 | Lists things to check for when inspecting code that | ||
118 | uses RCU. | ||
119 | |||
120 | listRCU.txt | ||
121 | |||
122 | Describes how to use RCU to protect linked lists. | ||
123 | This is the simplest and most common use of RCU | ||
124 | in the Linux kernel. | ||
125 | |||
126 | rcu.txt | ||
127 | |||
128 | You are reading it! | ||
129 | |||
130 | rcuref.txt | ||
131 | |||
132 | Describes how to combine use of reference counts | ||
133 | with RCU. | ||
134 | |||
135 | whatisRCU.txt | ||
136 | |||
137 | Overview of how the RCU implementation works. Along | ||
138 | the way, presents a conceptual view of RCU. | ||
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt new file mode 100644 index 000000000000..1423d2570d78 --- /dev/null +++ b/Documentation/RCU/stallwarn.txt | |||
@@ -0,0 +1,58 @@ | |||
1 | Using RCU's CPU Stall Detector | ||
2 | |||
3 | The CONFIG_RCU_CPU_STALL_DETECTOR kernel config parameter enables | ||
4 | RCU's CPU stall detector, which detects conditions that unduly delay | ||
5 | RCU grace periods. The stall detector's idea of what constitutes | ||
6 | "unduly delayed" is controlled by a pair of C preprocessor macros: | ||
7 | |||
8 | RCU_SECONDS_TILL_STALL_CHECK | ||
9 | |||
10 | This macro defines the period of time that RCU will wait from | ||
11 | the beginning of a grace period until it issues an RCU CPU | ||
12 | stall warning. It is normally ten seconds. | ||
13 | |||
14 | RCU_SECONDS_TILL_STALL_RECHECK | ||
15 | |||
16 | This macro defines the period of time that RCU will wait after | ||
17 | issuing a stall warning until it issues another stall warning. | ||
18 | It is normally set to thirty seconds. | ||
19 | |||
20 | RCU_STALL_RAT_DELAY | ||
21 | |||
22 | The CPU stall detector tries to make the offending CPU rat on itself, | ||
23 | as this often gives better-quality stack traces. However, if | ||
24 | the offending CPU does not detect its own stall in the number | ||
25 | of jiffies specified by RCU_STALL_RAT_DELAY, then other CPUs will | ||
26 | complain. This is normally set to two jiffies. | ||
27 | |||
28 | The following problems can result in an RCU CPU stall warning: | ||
29 | |||
30 | o A CPU looping in an RCU read-side critical section. | ||
31 | |||
32 | o A CPU looping with interrupts disabled. | ||
33 | |||
34 | o A CPU looping with preemption disabled. | ||
35 | |||
36 | o For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the kernel | ||
37 | without invoking schedule(). | ||
38 | |||
39 | o A bug in the RCU implementation. | ||
40 | |||
41 | o A hardware failure. This is quite unlikely, but has occurred | ||
42 | at least once in a former life. A CPU failed in a running system, | ||
43 | becoming unresponsive, but not causing an immediate crash. | ||
44 | This resulted in a series of RCU CPU stall warnings, eventually | ||
45 | leading the realization that the CPU had failed. | ||
46 | |||
47 | The RCU, RCU-sched, and RCU-bh implementations have CPU stall warning. | ||
48 | SRCU does not do so directly, but its calls to synchronize_sched() will | ||
49 | result in RCU-sched detecting any CPU stalls that might be occurring. | ||
50 | |||
51 | To diagnose the cause of the stall, inspect the stack traces. The offending | ||
52 | function will usually be near the top of the stack. If you have a series | ||
53 | of stall warnings from a single extended stall, comparing the stack traces | ||
54 | can often help determine where the stall is occurring, which will usually | ||
55 | be in the function nearest the top of the stack that stays the same from | ||
56 | trace to trace. | ||
57 | |||
58 | RCU bugs can often be debugged with the help of CONFIG_RCU_TRACE. | ||
diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt index 9dba3bb90e60..0e50bc2aa1e2 100644 --- a/Documentation/RCU/torture.txt +++ b/Documentation/RCU/torture.txt | |||
@@ -30,6 +30,18 @@ MODULE PARAMETERS | |||
30 | 30 | ||
31 | This module has the following parameters: | 31 | This module has the following parameters: |
32 | 32 | ||
33 | fqs_duration Duration (in microseconds) of artificially induced bursts | ||
34 | of force_quiescent_state() invocations. In RCU | ||
35 | implementations having force_quiescent_state(), these | ||
36 | bursts help force races between forcing a given grace | ||
37 | period and that grace period ending on its own. | ||
38 | |||
39 | fqs_holdoff Holdoff time (in microseconds) between consecutive calls | ||
40 | to force_quiescent_state() within a burst. | ||
41 | |||
42 | fqs_stutter Wait time (in seconds) between consecutive bursts | ||
43 | of calls to force_quiescent_state(). | ||
44 | |||
33 | irqreaders Says to invoke RCU readers from irq level. This is currently | 45 | irqreaders Says to invoke RCU readers from irq level. This is currently |
34 | done via timers. Defaults to "1" for variants of RCU that | 46 | done via timers. Defaults to "1" for variants of RCU that |
35 | permit this. (Or, more accurately, variants of RCU that do | 47 | permit this. (Or, more accurately, variants of RCU that do |
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt index 187bbf10c923..8608fd85e921 100644 --- a/Documentation/RCU/trace.txt +++ b/Documentation/RCU/trace.txt | |||
@@ -1,185 +1,10 @@ | |||
1 | CONFIG_RCU_TRACE debugfs Files and Formats | 1 | CONFIG_RCU_TRACE debugfs Files and Formats |
2 | 2 | ||
3 | 3 | ||
4 | The rcupreempt and rcutree implementations of RCU provide debugfs trace | 4 | The rcutree implementation of RCU provides debugfs trace output that |
5 | output that summarizes counters and state. This information is useful for | 5 | summarizes counters and state. This information is useful for debugging |
6 | debugging RCU itself, and can sometimes also help to debug abuses of RCU. | 6 | RCU itself, and can sometimes also help to debug abuses of RCU. |
7 | Note that the rcuclassic implementation of RCU does not provide debugfs | 7 | The following sections describe the debugfs files and formats. |
8 | trace output. | ||
9 | |||
10 | The following sections describe the debugfs files and formats for | ||
11 | preemptable RCU (rcupreempt) and hierarchical RCU (rcutree). | ||
12 | |||
13 | |||
14 | Preemptable RCU debugfs Files and Formats | ||
15 | |||
16 | This implementation of RCU provides three debugfs files under the | ||
17 | top-level directory RCU: rcu/rcuctrs (which displays the per-CPU | ||
18 | counters used by preemptable RCU) rcu/rcugp (which displays grace-period | ||
19 | counters), and rcu/rcustats (which internal counters for debugging RCU). | ||
20 | |||
21 | The output of "cat rcu/rcuctrs" looks as follows: | ||
22 | |||
23 | CPU last cur F M | ||
24 | 0 5 -5 0 0 | ||
25 | 1 -1 0 0 0 | ||
26 | 2 0 1 0 0 | ||
27 | 3 0 1 0 0 | ||
28 | 4 0 1 0 0 | ||
29 | 5 0 1 0 0 | ||
30 | 6 0 2 0 0 | ||
31 | 7 0 -1 0 0 | ||
32 | 8 0 1 0 0 | ||
33 | ggp = 26226, state = waitzero | ||
34 | |||
35 | The per-CPU fields are as follows: | ||
36 | |||
37 | o "CPU" gives the CPU number. Offline CPUs are not displayed. | ||
38 | |||
39 | o "last" gives the value of the counter that is being decremented | ||
40 | for the current grace period phase. In the example above, | ||
41 | the counters sum to 4, indicating that there are still four | ||
42 | RCU read-side critical sections still running that started | ||
43 | before the last counter flip. | ||
44 | |||
45 | o "cur" gives the value of the counter that is currently being | ||
46 | both incremented (by rcu_read_lock()) and decremented (by | ||
47 | rcu_read_unlock()). In the example above, the counters sum to | ||
48 | 1, indicating that there is only one RCU read-side critical section | ||
49 | still running that started after the last counter flip. | ||
50 | |||
51 | o "F" indicates whether RCU is waiting for this CPU to acknowledge | ||
52 | a counter flip. In the above example, RCU is not waiting on any, | ||
53 | which is consistent with the state being "waitzero" rather than | ||
54 | "waitack". | ||
55 | |||
56 | o "M" indicates whether RCU is waiting for this CPU to execute a | ||
57 | memory barrier. In the above example, RCU is not waiting on any, | ||
58 | which is consistent with the state being "waitzero" rather than | ||
59 | "waitmb". | ||
60 | |||
61 | o "ggp" is the global grace-period counter. | ||
62 | |||
63 | o "state" is the RCU state, which can be one of the following: | ||
64 | |||
65 | o "idle": there is no grace period in progress. | ||
66 | |||
67 | o "waitack": RCU just incremented the global grace-period | ||
68 | counter, which has the effect of reversing the roles of | ||
69 | the "last" and "cur" counters above, and is waiting for | ||
70 | all the CPUs to acknowledge the flip. Once the flip has | ||
71 | been acknowledged, CPUs will no longer be incrementing | ||
72 | what are now the "last" counters, so that their sum will | ||
73 | decrease monotonically down to zero. | ||
74 | |||
75 | o "waitzero": RCU is waiting for the sum of the "last" counters | ||
76 | to decrease to zero. | ||
77 | |||
78 | o "waitmb": RCU is waiting for each CPU to execute a memory | ||
79 | barrier, which ensures that instructions from a given CPU's | ||
80 | last RCU read-side critical section cannot be reordered | ||
81 | with instructions following the memory-barrier instruction. | ||
82 | |||
83 | The output of "cat rcu/rcugp" looks as follows: | ||
84 | |||
85 | oldggp=48870 newggp=48873 | ||
86 | |||
87 | Note that reading from this file provokes a synchronize_rcu(). The | ||
88 | "oldggp" value is that of "ggp" from rcu/rcuctrs above, taken before | ||
89 | executing the synchronize_rcu(), and the "newggp" value is also the | ||
90 | "ggp" value, but taken after the synchronize_rcu() command returns. | ||
91 | |||
92 | |||
93 | The output of "cat rcu/rcugp" looks as follows: | ||
94 | |||
95 | na=1337955 nl=40 wa=1337915 wl=44 da=1337871 dl=0 dr=1337871 di=1337871 | ||
96 | 1=50989 e1=6138 i1=49722 ie1=82 g1=49640 a1=315203 ae1=265563 a2=49640 | ||
97 | z1=1401244 ze1=1351605 z2=49639 m1=5661253 me1=5611614 m2=49639 | ||
98 | |||
99 | These are counters tracking internal preemptable-RCU events, however, | ||
100 | some of them may be useful for debugging algorithms using RCU. In | ||
101 | particular, the "nl", "wl", and "dl" values track the number of RCU | ||
102 | callbacks in various states. The fields are as follows: | ||
103 | |||
104 | o "na" is the total number of RCU callbacks that have been enqueued | ||
105 | since boot. | ||
106 | |||
107 | o "nl" is the number of RCU callbacks waiting for the previous | ||
108 | grace period to end so that they can start waiting on the next | ||
109 | grace period. | ||
110 | |||
111 | o "wa" is the total number of RCU callbacks that have started waiting | ||
112 | for a grace period since boot. "na" should be roughly equal to | ||
113 | "nl" plus "wa". | ||
114 | |||
115 | o "wl" is the number of RCU callbacks currently waiting for their | ||
116 | grace period to end. | ||
117 | |||
118 | o "da" is the total number of RCU callbacks whose grace periods | ||
119 | have completed since boot. "wa" should be roughly equal to | ||
120 | "wl" plus "da". | ||
121 | |||
122 | o "dr" is the total number of RCU callbacks that have been removed | ||
123 | from the list of callbacks ready to invoke. "dr" should be roughly | ||
124 | equal to "da". | ||
125 | |||
126 | o "di" is the total number of RCU callbacks that have been invoked | ||
127 | since boot. "di" should be roughly equal to "da", though some | ||
128 | early versions of preemptable RCU had a bug so that only the | ||
129 | last CPU's count of invocations was displayed, rather than the | ||
130 | sum of all CPU's counts. | ||
131 | |||
132 | o "1" is the number of calls to rcu_try_flip(). This should be | ||
133 | roughly equal to the sum of "e1", "i1", "a1", "z1", and "m1" | ||
134 | described below. In other words, the number of times that | ||
135 | the state machine is visited should be equal to the sum of the | ||
136 | number of times that each state is visited plus the number of | ||
137 | times that the state-machine lock acquisition failed. | ||
138 | |||
139 | o "e1" is the number of times that rcu_try_flip() was unable to | ||
140 | acquire the fliplock. | ||
141 | |||
142 | o "i1" is the number of calls to rcu_try_flip_idle(). | ||
143 | |||
144 | o "ie1" is the number of times rcu_try_flip_idle() exited early | ||
145 | due to the calling CPU having no work for RCU. | ||
146 | |||
147 | o "g1" is the number of times that rcu_try_flip_idle() decided | ||
148 | to start a new grace period. "i1" should be roughly equal to | ||
149 | "ie1" plus "g1". | ||
150 | |||
151 | o "a1" is the number of calls to rcu_try_flip_waitack(). | ||
152 | |||
153 | o "ae1" is the number of times that rcu_try_flip_waitack() found | ||
154 | that at least one CPU had not yet acknowledge the new grace period | ||
155 | (AKA "counter flip"). | ||
156 | |||
157 | o "a2" is the number of time rcu_try_flip_waitack() found that | ||
158 | all CPUs had acknowledged. "a1" should be roughly equal to | ||
159 | "ae1" plus "a2". (This particular output was collected on | ||
160 | a 128-CPU machine, hence the smaller-than-usual fraction of | ||
161 | calls to rcu_try_flip_waitack() finding all CPUs having already | ||
162 | acknowledged.) | ||
163 | |||
164 | o "z1" is the number of calls to rcu_try_flip_waitzero(). | ||
165 | |||
166 | o "ze1" is the number of times that rcu_try_flip_waitzero() found | ||
167 | that not all of the old RCU read-side critical sections had | ||
168 | completed. | ||
169 | |||
170 | o "z2" is the number of times that rcu_try_flip_waitzero() finds | ||
171 | the sum of the counters equal to zero, in other words, that | ||
172 | all of the old RCU read-side critical sections had completed. | ||
173 | The value of "z1" should be roughly equal to "ze1" plus | ||
174 | "z2". | ||
175 | |||
176 | o "m1" is the number of calls to rcu_try_flip_waitmb(). | ||
177 | |||
178 | o "me1" is the number of times that rcu_try_flip_waitmb() finds | ||
179 | that at least one CPU has not yet executed a memory barrier. | ||
180 | |||
181 | o "m2" is the number of times that rcu_try_flip_waitmb() finds that | ||
182 | all CPUs have executed a memory barrier. | ||
183 | 8 | ||
184 | 9 | ||
185 | Hierarchical RCU debugfs Files and Formats | 10 | Hierarchical RCU debugfs Files and Formats |
@@ -210,9 +35,10 @@ rcu_bh: | |||
210 | 6 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=859/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 | 35 | 6 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=859/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 |
211 | 7 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3761/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 | 36 | 7 c=-275 g=-275 pq=1 pqc=-275 qp=0 dt=3761/1 dn=0 df=15 of=0 ri=0 ql=0 b=10 |
212 | 37 | ||
213 | The first section lists the rcu_data structures for rcu, the second for | 38 | The first section lists the rcu_data structures for rcu_sched, the second |
214 | rcu_bh. Each section has one line per CPU, or eight for this 8-CPU system. | 39 | for rcu_bh. Note that CONFIG_TREE_PREEMPT_RCU kernels will have an |
215 | The fields are as follows: | 40 | additional section for rcu_preempt. Each section has one line per CPU, |
41 | or eight for this 8-CPU system. The fields are as follows: | ||
216 | 42 | ||
217 | o The number at the beginning of each line is the CPU number. | 43 | o The number at the beginning of each line is the CPU number. |
218 | CPUs numbers followed by an exclamation mark are offline, | 44 | CPUs numbers followed by an exclamation mark are offline, |
@@ -223,9 +49,9 @@ o The number at the beginning of each line is the CPU number. | |||
223 | 49 | ||
224 | o "c" is the count of grace periods that this CPU believes have | 50 | o "c" is the count of grace periods that this CPU believes have |
225 | completed. CPUs in dynticks idle mode may lag quite a ways | 51 | completed. CPUs in dynticks idle mode may lag quite a ways |
226 | behind, for example, CPU 4 under "rcu" above, which has slept | 52 | behind, for example, CPU 4 under "rcu_sched" above, which has |
227 | through the past 25 RCU grace periods. It is not unusual to | 53 | slept through the past 25 RCU grace periods. It is not unusual |
228 | see CPUs lagging by thousands of grace periods. | 54 | to see CPUs lagging by thousands of grace periods. |
229 | 55 | ||
230 | o "g" is the count of grace periods that this CPU believes have | 56 | o "g" is the count of grace periods that this CPU believes have |
231 | started. Again, CPUs in dynticks idle mode may lag behind. | 57 | started. Again, CPUs in dynticks idle mode may lag behind. |
@@ -308,8 +134,10 @@ The output of "cat rcu/rcugp" looks as follows: | |||
308 | rcu_sched: completed=33062 gpnum=33063 | 134 | rcu_sched: completed=33062 gpnum=33063 |
309 | rcu_bh: completed=464 gpnum=464 | 135 | rcu_bh: completed=464 gpnum=464 |
310 | 136 | ||
311 | Again, this output is for both "rcu" and "rcu_bh". The fields are | 137 | Again, this output is for both "rcu_sched" and "rcu_bh". Note that |
312 | taken from the rcu_state structure, and are as follows: | 138 | kernels built with CONFIG_TREE_PREEMPT_RCU will have an additional |
139 | "rcu_preempt" line. The fields are taken from the rcu_state structure, | ||
140 | and are as follows: | ||
313 | 141 | ||
314 | o "completed" is the number of grace periods that have completed. | 142 | o "completed" is the number of grace periods that have completed. |
315 | It is comparable to the "c" field from rcu/rcudata in that a | 143 | It is comparable to the "c" field from rcu/rcudata in that a |
@@ -324,23 +152,24 @@ o "gpnum" is the number of grace periods that have started. It is | |||
324 | If these two fields are equal (as they are for "rcu_bh" above), | 152 | If these two fields are equal (as they are for "rcu_bh" above), |
325 | then there is no grace period in progress, in other words, RCU | 153 | then there is no grace period in progress, in other words, RCU |
326 | is idle. On the other hand, if the two fields differ (as they | 154 | is idle. On the other hand, if the two fields differ (as they |
327 | do for "rcu" above), then an RCU grace period is in progress. | 155 | do for "rcu_sched" above), then an RCU grace period is in progress. |
328 | 156 | ||
329 | 157 | ||
330 | The output of "cat rcu/rcuhier" looks as follows, with very long lines: | 158 | The output of "cat rcu/rcuhier" looks as follows, with very long lines: |
331 | 159 | ||
332 | c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 | 160 | c=6902 g=6903 s=2 jfq=3 j=72c7 nfqs=13142/nfqsng=0(13142) fqlh=6 oqlen=0 |
333 | 1/1 0:127 ^0 | 161 | 1/1 .>. 0:127 ^0 |
334 | 3/3 0:35 ^0 0/0 36:71 ^1 0/0 72:107 ^2 0/0 108:127 ^3 | 162 | 3/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3 |
335 | 3/3f 0:5 ^0 2/3 6:11 ^1 0/0 12:17 ^2 0/0 18:23 ^3 0/0 24:29 ^4 0/0 30:35 ^5 0/0 36:41 ^0 0/0 42:47 ^1 0/0 48:53 ^2 0/0 54:59 ^3 0/0 60:65 ^4 0/0 66:71 ^5 0/0 72:77 ^0 0/0 78:83 ^1 0/0 84:89 ^2 0/0 90:95 ^3 0/0 96:101 ^4 0/0 102:107 ^5 0/0 108:113 ^0 0/0 114:119 ^1 0/0 120:125 ^2 0/0 126:127 ^3 | 163 | 3/3f .>. 0:5 ^0 2/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3 |
336 | rcu_bh: | 164 | rcu_bh: |
337 | c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 | 165 | c=-226 g=-226 s=1 jfq=-5701 j=72c7 nfqs=88/nfqsng=0(88) fqlh=0 oqlen=0 |
338 | 0/1 0:127 ^0 | 166 | 0/1 .>. 0:127 ^0 |
339 | 0/3 0:35 ^0 0/0 36:71 ^1 0/0 72:107 ^2 0/0 108:127 ^3 | 167 | 0/3 .>. 0:35 ^0 0/0 .>. 36:71 ^1 0/0 .>. 72:107 ^2 0/0 .>. 108:127 ^3 |
340 | 0/3f 0:5 ^0 0/3 6:11 ^1 0/0 12:17 ^2 0/0 18:23 ^3 0/0 24:29 ^4 0/0 30:35 ^5 0/0 36:41 ^0 0/0 42:47 ^1 0/0 48:53 ^2 0/0 54:59 ^3 0/0 60:65 ^4 0/0 66:71 ^5 0/0 72:77 ^0 0/0 78:83 ^1 0/0 84:89 ^2 0/0 90:95 ^3 0/0 96:101 ^4 0/0 102:107 ^5 0/0 108:113 ^0 0/0 114:119 ^1 0/0 120:125 ^2 0/0 126:127 ^3 | 168 | 0/3f .>. 0:5 ^0 0/3 .>. 6:11 ^1 0/0 .>. 12:17 ^2 0/0 .>. 18:23 ^3 0/0 .>. 24:29 ^4 0/0 .>. 30:35 ^5 0/0 .>. 36:41 ^0 0/0 .>. 42:47 ^1 0/0 .>. 48:53 ^2 0/0 .>. 54:59 ^3 0/0 .>. 60:65 ^4 0/0 .>. 66:71 ^5 0/0 .>. 72:77 ^0 0/0 .>. 78:83 ^1 0/0 .>. 84:89 ^2 0/0 .>. 90:95 ^3 0/0 .>. 96:101 ^4 0/0 .>. 102:107 ^5 0/0 .>. 108:113 ^0 0/0 .>. 114:119 ^1 0/0 .>. 120:125 ^2 0/0 .>. 126:127 ^3 |
341 | 169 | ||
342 | This is once again split into "rcu" and "rcu_bh" portions. The fields are | 170 | This is once again split into "rcu_sched" and "rcu_bh" portions, |
343 | as follows: | 171 | and CONFIG_TREE_PREEMPT_RCU kernels will again have an additional |
172 | "rcu_preempt" section. The fields are as follows: | ||
344 | 173 | ||
345 | o "c" is exactly the same as "completed" under rcu/rcugp. | 174 | o "c" is exactly the same as "completed" under rcu/rcugp. |
346 | 175 | ||
@@ -372,6 +201,11 @@ o "fqlh" is the number of calls to force_quiescent_state() that | |||
372 | exited immediately (without even being counted in nfqs above) | 201 | exited immediately (without even being counted in nfqs above) |
373 | due to contention on ->fqslock. | 202 | due to contention on ->fqslock. |
374 | 203 | ||
204 | o "oqlen" is the number of callbacks on the "orphan" callback | ||
205 | list. RCU callbacks are placed on this list by CPUs going | ||
206 | offline, and are "adopted" either by the CPU helping the outgoing | ||
207 | CPU or by the next rcu_barrier*() call, whichever comes first. | ||
208 | |||
375 | o Each element of the form "1/1 0:127 ^0" represents one struct | 209 | o Each element of the form "1/1 0:127 ^0" represents one struct |
376 | rcu_node. Each line represents one level of the hierarchy, from | 210 | rcu_node. Each line represents one level of the hierarchy, from |
377 | root to leaves. It is best to think of the rcu_data structures | 211 | root to leaves. It is best to think of the rcu_data structures |
@@ -379,7 +213,7 @@ o Each element of the form "1/1 0:127 ^0" represents one struct | |||
379 | might be either one, two, or three levels of rcu_node structures, | 213 | might be either one, two, or three levels of rcu_node structures, |
380 | depending on the relationship between CONFIG_RCU_FANOUT and | 214 | depending on the relationship between CONFIG_RCU_FANOUT and |
381 | CONFIG_NR_CPUS. | 215 | CONFIG_NR_CPUS. |
382 | 216 | ||
383 | o The numbers separated by the "/" are the qsmask followed | 217 | o The numbers separated by the "/" are the qsmask followed |
384 | by the qsmaskinit. The qsmask will have one bit | 218 | by the qsmaskinit. The qsmask will have one bit |
385 | set for each entity in the next lower level that | 219 | set for each entity in the next lower level that |
@@ -389,10 +223,19 @@ o Each element of the form "1/1 0:127 ^0" represents one struct | |||
389 | The value of qsmaskinit is assigned to that of qsmask | 223 | The value of qsmaskinit is assigned to that of qsmask |
390 | at the beginning of each grace period. | 224 | at the beginning of each grace period. |
391 | 225 | ||
392 | For example, for "rcu", the qsmask of the first entry | 226 | For example, for "rcu_sched", the qsmask of the first |
393 | of the lowest level is 0x14, meaning that we are still | 227 | entry of the lowest level is 0x14, meaning that we |
394 | waiting for CPUs 2 and 4 to check in for the current | 228 | are still waiting for CPUs 2 and 4 to check in for the |
395 | grace period. | 229 | current grace period. |
230 | |||
231 | o The characters separated by the ">" indicate the state | ||
232 | of the blocked-tasks lists. A "T" preceding the ">" | ||
233 | indicates that at least one task blocked in an RCU | ||
234 | read-side critical section blocks the current grace | ||
235 | period, while a "." preceding the ">" indicates otherwise. | ||
236 | The character following the ">" indicates similarly for | ||
237 | the next grace period. A "T" should appear in this | ||
238 | field only for rcu-preempt. | ||
396 | 239 | ||
397 | o The numbers separated by the ":" are the range of CPUs | 240 | o The numbers separated by the ":" are the range of CPUs |
398 | served by this struct rcu_node. This can be helpful | 241 | served by this struct rcu_node. This can be helpful |
@@ -431,8 +274,9 @@ rcu_bh: | |||
431 | 6 np=120834 qsp=9902 cbr=0 cng=0 gpc=6 gps=3 nf=2 nn=110921 | 274 | 6 np=120834 qsp=9902 cbr=0 cng=0 gpc=6 gps=3 nf=2 nn=110921 |
432 | 7 np=144888 qsp=26336 cbr=0 cng=0 gpc=8 gps=2 nf=0 nn=118542 | 275 | 7 np=144888 qsp=26336 cbr=0 cng=0 gpc=8 gps=2 nf=0 nn=118542 |
433 | 276 | ||
434 | As always, this is once again split into "rcu" and "rcu_bh" portions. | 277 | As always, this is once again split into "rcu_sched" and "rcu_bh" |
435 | The fields are as follows: | 278 | portions, with CONFIG_TREE_PREEMPT_RCU kernels having an additional |
279 | "rcu_preempt" section. The fields are as follows: | ||
436 | 280 | ||
437 | o "np" is the number of times that __rcu_pending() has been invoked | 281 | o "np" is the number of times that __rcu_pending() has been invoked |
438 | for the corresponding flavor of RCU. | 282 | for the corresponding flavor of RCU. |
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index e41a7fecf0d3..cfaac34c4557 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt | |||
@@ -323,14 +323,17 @@ used as follows: | |||
323 | Defer Protect | 323 | Defer Protect |
324 | 324 | ||
325 | a. synchronize_rcu() rcu_read_lock() / rcu_read_unlock() | 325 | a. synchronize_rcu() rcu_read_lock() / rcu_read_unlock() |
326 | call_rcu() | 326 | call_rcu() rcu_dereference() |
327 | 327 | ||
328 | b. call_rcu_bh() rcu_read_lock_bh() / rcu_read_unlock_bh() | 328 | b. call_rcu_bh() rcu_read_lock_bh() / rcu_read_unlock_bh() |
329 | rcu_dereference_bh() | ||
329 | 330 | ||
330 | c. synchronize_sched() preempt_disable() / preempt_enable() | 331 | c. synchronize_sched() rcu_read_lock_sched() / rcu_read_unlock_sched() |
332 | preempt_disable() / preempt_enable() | ||
331 | local_irq_save() / local_irq_restore() | 333 | local_irq_save() / local_irq_restore() |
332 | hardirq enter / hardirq exit | 334 | hardirq enter / hardirq exit |
333 | NMI enter / NMI exit | 335 | NMI enter / NMI exit |
336 | rcu_dereference_sched() | ||
334 | 337 | ||
335 | These three mechanisms are used as follows: | 338 | These three mechanisms are used as follows: |
336 | 339 | ||
@@ -780,9 +783,8 @@ Linux-kernel source code, but it helps to have a full list of the | |||
780 | APIs, since there does not appear to be a way to categorize them | 783 | APIs, since there does not appear to be a way to categorize them |
781 | in docbook. Here is the list, by category. | 784 | in docbook. Here is the list, by category. |
782 | 785 | ||
783 | RCU pointer/list traversal: | 786 | RCU list traversal: |
784 | 787 | ||
785 | rcu_dereference | ||
786 | list_for_each_entry_rcu | 788 | list_for_each_entry_rcu |
787 | hlist_for_each_entry_rcu | 789 | hlist_for_each_entry_rcu |
788 | hlist_nulls_for_each_entry_rcu | 790 | hlist_nulls_for_each_entry_rcu |
@@ -808,7 +810,7 @@ RCU: Critical sections Grace period Barrier | |||
808 | 810 | ||
809 | rcu_read_lock synchronize_net rcu_barrier | 811 | rcu_read_lock synchronize_net rcu_barrier |
810 | rcu_read_unlock synchronize_rcu | 812 | rcu_read_unlock synchronize_rcu |
811 | synchronize_rcu_expedited | 813 | rcu_dereference synchronize_rcu_expedited |
812 | call_rcu | 814 | call_rcu |
813 | 815 | ||
814 | 816 | ||
@@ -816,7 +818,7 @@ bh: Critical sections Grace period Barrier | |||
816 | 818 | ||
817 | rcu_read_lock_bh call_rcu_bh rcu_barrier_bh | 819 | rcu_read_lock_bh call_rcu_bh rcu_barrier_bh |
818 | rcu_read_unlock_bh synchronize_rcu_bh | 820 | rcu_read_unlock_bh synchronize_rcu_bh |
819 | synchronize_rcu_bh_expedited | 821 | rcu_dereference_bh synchronize_rcu_bh_expedited |
820 | 822 | ||
821 | 823 | ||
822 | sched: Critical sections Grace period Barrier | 824 | sched: Critical sections Grace period Barrier |
@@ -825,17 +827,25 @@ sched: Critical sections Grace period Barrier | |||
825 | rcu_read_unlock_sched call_rcu_sched | 827 | rcu_read_unlock_sched call_rcu_sched |
826 | [preempt_disable] synchronize_sched_expedited | 828 | [preempt_disable] synchronize_sched_expedited |
827 | [and friends] | 829 | [and friends] |
830 | rcu_dereference_sched | ||
828 | 831 | ||
829 | 832 | ||
830 | SRCU: Critical sections Grace period Barrier | 833 | SRCU: Critical sections Grace period Barrier |
831 | 834 | ||
832 | srcu_read_lock synchronize_srcu N/A | 835 | srcu_read_lock synchronize_srcu N/A |
833 | srcu_read_unlock | 836 | srcu_read_unlock synchronize_srcu_expedited |
837 | srcu_dereference | ||
834 | 838 | ||
835 | SRCU: Initialization/cleanup | 839 | SRCU: Initialization/cleanup |
836 | init_srcu_struct | 840 | init_srcu_struct |
837 | cleanup_srcu_struct | 841 | cleanup_srcu_struct |
838 | 842 | ||
843 | All: lockdep-checked RCU-protected pointer access | ||
844 | |||
845 | rcu_dereference_check | ||
846 | rcu_dereference_protected | ||
847 | rcu_access_pointer | ||
848 | |||
839 | See the comment headers in the source code (or the docbook generated | 849 | See the comment headers in the source code (or the docbook generated |
840 | from them) for more information. | 850 | from them) for more information. |
841 | 851 | ||