diff options
author | Paul E. McKenney <paulmck@us.ibm.com> | 2005-05-01 11:59:05 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-05-01 11:59:05 -0400 |
commit | a83f1fe27f7252a2b73b4f22066e92bf99bd595b (patch) | |
tree | b6c4dd485fff268f86f396a85aad88cf29783dcd | |
parent | fbd568a3e61a7decb8a754ad952aaa5b5c82e9e5 (diff) |
[PATCH] Update RCU documentation
Update the RCU documentation to allow for the new synchronize_rcu() and
synchronize_sched() primitives. Fix a few other nits as well.
Signed-off-by: Paul E. McKenney <paulmck@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r-- | Documentation/RCU/RTFP.txt | 29 | ||||
-rw-r--r-- | Documentation/RCU/UP.txt | 8 | ||||
-rw-r--r-- | Documentation/RCU/checklist.txt | 47 | ||||
-rw-r--r-- | Documentation/RCU/listRCU.txt | 13 | ||||
-rw-r--r-- | Documentation/RCU/rcu.txt | 4 |
5 files changed, 77 insertions, 24 deletions
diff --git a/Documentation/RCU/RTFP.txt b/Documentation/RCU/RTFP.txt index 12250b342e1f..9c6d450138ea 100644 --- a/Documentation/RCU/RTFP.txt +++ b/Documentation/RCU/RTFP.txt | |||
@@ -108,8 +108,9 @@ year saw a paper describing an RCU implementation of System V IPC | |||
108 | 2004 has seen a Linux-Journal article on use of RCU in dcache | 108 | 2004 has seen a Linux-Journal article on use of RCU in dcache |
109 | [McKenney04a], a performance comparison of locking to RCU on several | 109 | [McKenney04a], a performance comparison of locking to RCU on several |
110 | different CPUs [McKenney04b], a dissertation describing use of RCU in a | 110 | different CPUs [McKenney04b], a dissertation describing use of RCU in a |
111 | number of operating-system kernels [PaulEdwardMcKenneyPhD], and a paper | 111 | number of operating-system kernels [PaulEdwardMcKenneyPhD], a paper |
112 | describing how to make RCU safe for soft-realtime applications [Sarma04c]. | 112 | describing how to make RCU safe for soft-realtime applications [Sarma04c], |
113 | and a paper describing SELinux performance with RCU [JamesMorris04b]. | ||
113 | 114 | ||
114 | 115 | ||
115 | Bibtex Entries | 116 | Bibtex Entries |
@@ -341,6 +342,17 @@ Dipankar Sarma" | |||
341 | ,pages="18-26" | 342 | ,pages="18-26" |
342 | } | 343 | } |
343 | 344 | ||
345 | @techreport{Friedberg03a | ||
346 | ,author="Stuart A. Friedberg" | ||
347 | ,title="Lock-Free Wild Card Search Data Structure and Method" | ||
348 | ,institution="US Patent and Trademark Office" | ||
349 | ,address="Washington, DC" | ||
350 | ,year="2003" | ||
351 | ,number="US Patent 6,662,184 (contributed under GPL)" | ||
352 | ,month="December" | ||
353 | ,pages="112" | ||
354 | } | ||
355 | |||
344 | @article{McKenney04a | 356 | @article{McKenney04a |
345 | ,author="Paul E. McKenney and Dipankar Sarma and Maneesh Soni" | 357 | ,author="Paul E. McKenney and Dipankar Sarma and Maneesh Soni" |
346 | ,title="Scaling dcache with {RCU}" | 358 | ,title="Scaling dcache with {RCU}" |
@@ -373,6 +385,9 @@ in Operating System Kernels" | |||
373 | ,school="OGI School of Science and Engineering at | 385 | ,school="OGI School of Science and Engineering at |
374 | Oregon Health and Sciences University" | 386 | Oregon Health and Sciences University" |
375 | ,year="2004" | 387 | ,year="2004" |
388 | ,note="Available: | ||
389 | \url{http://www.rdrop.com/users/paulmck/RCU/RCUdissertation.2004.07.14e1.pdf} | ||
390 | [Viewed October 15, 2004]" | ||
376 | } | 391 | } |
377 | 392 | ||
378 | @Conference{Sarma04c | 393 | @Conference{Sarma04c |
@@ -385,3 +400,13 @@ Oregon Health and Sciences University" | |||
385 | ,month="June" | 400 | ,month="June" |
386 | ,pages="182-191" | 401 | ,pages="182-191" |
387 | } | 402 | } |
403 | |||
404 | @unpublished{JamesMorris04b | ||
405 | ,Author="James Morris" | ||
406 | ,Title="Recent Developments in {SELinux} Kernel Performance" | ||
407 | ,month="December" | ||
408 | ,year="2004" | ||
409 | ,note="Available: | ||
410 | \url{http://www.livejournal.com/users/james_morris/2153.html} | ||
411 | [Viewed December 10, 2004]" | ||
412 | } | ||
diff --git a/Documentation/RCU/UP.txt b/Documentation/RCU/UP.txt index 551a803d82a8..3bfb84b3b7db 100644 --- a/Documentation/RCU/UP.txt +++ b/Documentation/RCU/UP.txt | |||
@@ -2,11 +2,11 @@ RCU on Uniprocessor Systems | |||
2 | 2 | ||
3 | 3 | ||
4 | A common misconception is that, on UP systems, the call_rcu() primitive | 4 | A common misconception is that, on UP systems, the call_rcu() primitive |
5 | may immediately invoke its function, and that the synchronize_kernel | 5 | may immediately invoke its function, and that the synchronize_rcu() |
6 | primitive may return immediately. The basis of this misconception | 6 | primitive may return immediately. The basis of this misconception |
7 | is that since there is only one CPU, it should not be necessary to | 7 | is that since there is only one CPU, it should not be necessary to |
8 | wait for anything else to get done, since there are no other CPUs for | 8 | wait for anything else to get done, since there are no other CPUs for |
9 | anything else to be happening on. Although this approach will sort of | 9 | anything else to be happening on. Although this approach will -sort- -of- |
10 | work a surprising amount of the time, it is a very bad idea in general. | 10 | work a surprising amount of the time, it is a very bad idea in general. |
11 | This document presents two examples that demonstrate exactly how bad an | 11 | This document presents two examples that demonstrate exactly how bad an |
12 | idea this is. | 12 | idea this is. |
@@ -44,14 +44,14 @@ its arguments would cause it to fail to make the fundamental guarantee | |||
44 | underlying RCU, namely that call_rcu() defers invoking its arguments until | 44 | underlying RCU, namely that call_rcu() defers invoking its arguments until |
45 | all RCU read-side critical sections currently executing have completed. | 45 | all RCU read-side critical sections currently executing have completed. |
46 | 46 | ||
47 | Quick Quiz: why is it -not- legal to invoke synchronize_kernel() in | 47 | Quick Quiz: why is it -not- legal to invoke synchronize_rcu() in |
48 | this case? | 48 | this case? |
49 | 49 | ||
50 | 50 | ||
51 | Summary | 51 | Summary |
52 | 52 | ||
53 | Permitting call_rcu() to immediately invoke its arguments or permitting | 53 | Permitting call_rcu() to immediately invoke its arguments or permitting |
54 | synchronize_kernel() to immediately return breaks RCU, even on a UP system. | 54 | synchronize_rcu() to immediately return breaks RCU, even on a UP system. |
55 | So do not do it! Even on a UP system, the RCU infrastructure -must- | 55 | So do not do it! Even on a UP system, the RCU infrastructure -must- |
56 | respect grace periods. | 56 | respect grace periods. |
57 | 57 | ||
diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index b3a568abe6b1..8f3fb77c9cd3 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt | |||
@@ -32,7 +32,10 @@ over a rather long period of time, but improvements are always welcome! | |||
32 | them -- even x86 allows reads to be reordered), and be prepared | 32 | them -- even x86 allows reads to be reordered), and be prepared |
33 | to explain why this added complexity is worthwhile. If you | 33 | to explain why this added complexity is worthwhile. If you |
34 | choose #c, be prepared to explain how this single task does not | 34 | choose #c, be prepared to explain how this single task does not |
35 | become a major bottleneck on big multiprocessor machines. | 35 | become a major bottleneck on big multiprocessor machines (for |
36 | example, if the task is updating information relating to itself | ||
37 | that other tasks can read, there by definition can be no | ||
38 | bottleneck). | ||
36 | 39 | ||
37 | 2. Do the RCU read-side critical sections make proper use of | 40 | 2. Do the RCU read-side critical sections make proper use of |
38 | rcu_read_lock() and friends? These primitives are needed | 41 | rcu_read_lock() and friends? These primitives are needed |
@@ -89,27 +92,34 @@ over a rather long period of time, but improvements are always welcome! | |||
89 | "_rcu()" list-traversal primitives, such as the | 92 | "_rcu()" list-traversal primitives, such as the |
90 | list_for_each_entry_rcu(). | 93 | list_for_each_entry_rcu(). |
91 | 94 | ||
92 | b. If the list macros are being used, the list_del_rcu(), | 95 | b. If the list macros are being used, the list_add_tail_rcu() |
93 | list_add_tail_rcu(), and list_del_rcu() primitives must | 96 | and list_add_rcu() primitives must be used in order |
94 | be used in order to prevent weakly ordered machines from | 97 | to prevent weakly ordered machines from misordering |
95 | misordering structure initialization and pointer planting. | 98 | structure initialization and pointer planting. |
96 | Similarly, if the hlist macros are being used, the | 99 | Similarly, if the hlist macros are being used, the |
97 | hlist_del_rcu() and hlist_add_head_rcu() primitives | 100 | hlist_add_head_rcu() primitive is required. |
98 | are required. | ||
99 | 101 | ||
100 | c. Updates must ensure that initialization of a given | 102 | c. If the list macros are being used, the list_del_rcu() |
103 | primitive must be used to keep list_del()'s pointer | ||
104 | poisoning from inflicting toxic effects on concurrent | ||
105 | readers. Similarly, if the hlist macros are being used, | ||
106 | the hlist_del_rcu() primitive is required. | ||
107 | |||
108 | The list_replace_rcu() primitive may be used to | ||
109 | replace an old structure with a new one in an | ||
110 | RCU-protected list. | ||
111 | |||
112 | d. Updates must ensure that initialization of a given | ||
101 | structure happens before pointers to that structure are | 113 | structure happens before pointers to that structure are |
102 | publicized. Use the rcu_assign_pointer() primitive | 114 | publicized. Use the rcu_assign_pointer() primitive |
103 | when publicizing a pointer to a structure that can | 115 | when publicizing a pointer to a structure that can |
104 | be traversed by an RCU read-side critical section. | 116 | be traversed by an RCU read-side critical section. |
105 | 117 | ||
106 | [The rcu_assign_pointer() primitive is in process.] | ||
107 | |||
108 | 5. If call_rcu(), or a related primitive such as call_rcu_bh(), | 118 | 5. If call_rcu(), or a related primitive such as call_rcu_bh(), |
109 | is used, the callback function must be written to be called | 119 | is used, the callback function must be written to be called |
110 | from softirq context. In particular, it cannot block. | 120 | from softirq context. In particular, it cannot block. |
111 | 121 | ||
112 | 6. Since synchronize_kernel() blocks, it cannot be called from | 122 | 6. Since synchronize_rcu() can block, it cannot be called from |
113 | any sort of irq context. | 123 | any sort of irq context. |
114 | 124 | ||
115 | 7. If the updater uses call_rcu(), then the corresponding readers | 125 | 7. If the updater uses call_rcu(), then the corresponding readers |
@@ -125,9 +135,9 @@ over a rather long period of time, but improvements are always welcome! | |||
125 | such cases is a must, of course! And the jury is still out on | 135 | such cases is a must, of course! And the jury is still out on |
126 | whether the increased speed is worth it. | 136 | whether the increased speed is worth it. |
127 | 137 | ||
128 | 8. Although synchronize_kernel() is a bit slower than is call_rcu(), | 138 | 8. Although synchronize_rcu() is a bit slower than is call_rcu(), |
129 | it usually results in simpler code. So, unless update performance | 139 | it usually results in simpler code. So, unless update performance |
130 | is important or the updaters cannot block, synchronize_kernel() | 140 | is important or the updaters cannot block, synchronize_rcu() |
131 | should be used in preference to call_rcu(). | 141 | should be used in preference to call_rcu(). |
132 | 142 | ||
133 | 9. All RCU list-traversal primitives, which include | 143 | 9. All RCU list-traversal primitives, which include |
@@ -155,3 +165,14 @@ over a rather long period of time, but improvements are always welcome! | |||
155 | you -must- use the "_rcu()" variants of the list macros. | 165 | you -must- use the "_rcu()" variants of the list macros. |
156 | Failing to do so will break Alpha and confuse people reading | 166 | Failing to do so will break Alpha and confuse people reading |
157 | your code. | 167 | your code. |
168 | |||
169 | 11. Note that synchronize_rcu() -only- guarantees to wait until | ||
170 | all currently executing rcu_read_lock()-protected RCU read-side | ||
171 | critical sections complete. It does -not- necessarily guarantee | ||
172 | that all currently running interrupts, NMIs, preempt_disable() | ||
173 | code, or idle loops will complete. Therefore, if you do not have | ||
174 | rcu_read_lock()-protected read-side critical sections, do -not- | ||
175 | use synchronize_rcu(). | ||
176 | |||
177 | If you want to wait for some of these other things, you might | ||
178 | instead need to use synchronize_irq() or synchronize_sched(). | ||
diff --git a/Documentation/RCU/listRCU.txt b/Documentation/RCU/listRCU.txt index bda6ead69bd0..f8a54fa0d8ab 100644 --- a/Documentation/RCU/listRCU.txt +++ b/Documentation/RCU/listRCU.txt | |||
@@ -32,6 +32,7 @@ implementation of audit_filter_task() might be as follows: | |||
32 | enum audit_state state; | 32 | enum audit_state state; |
33 | 33 | ||
34 | read_lock(&auditsc_lock); | 34 | read_lock(&auditsc_lock); |
35 | /* Note: audit_netlink_sem held by caller. */ | ||
35 | list_for_each_entry(e, &audit_tsklist, list) { | 36 | list_for_each_entry(e, &audit_tsklist, list) { |
36 | if (audit_filter_rules(tsk, &e->rule, NULL, &state)) { | 37 | if (audit_filter_rules(tsk, &e->rule, NULL, &state)) { |
37 | read_unlock(&auditsc_lock); | 38 | read_unlock(&auditsc_lock); |
@@ -55,6 +56,7 @@ This means that RCU can be easily applied to the read side, as follows: | |||
55 | enum audit_state state; | 56 | enum audit_state state; |
56 | 57 | ||
57 | rcu_read_lock(); | 58 | rcu_read_lock(); |
59 | /* Note: audit_netlink_sem held by caller. */ | ||
58 | list_for_each_entry_rcu(e, &audit_tsklist, list) { | 60 | list_for_each_entry_rcu(e, &audit_tsklist, list) { |
59 | if (audit_filter_rules(tsk, &e->rule, NULL, &state)) { | 61 | if (audit_filter_rules(tsk, &e->rule, NULL, &state)) { |
60 | rcu_read_unlock(); | 62 | rcu_read_unlock(); |
@@ -139,12 +141,15 @@ Normally, the write_lock() and write_unlock() would be replaced by | |||
139 | a spin_lock() and a spin_unlock(), but in this case, all callers hold | 141 | a spin_lock() and a spin_unlock(), but in this case, all callers hold |
140 | audit_netlink_sem, so no additional locking is required. The auditsc_lock | 142 | audit_netlink_sem, so no additional locking is required. The auditsc_lock |
141 | can therefore be eliminated, since use of RCU eliminates the need for | 143 | can therefore be eliminated, since use of RCU eliminates the need for |
142 | writers to exclude readers. | 144 | writers to exclude readers. Normally, the write_lock() calls would |
145 | be converted into spin_lock() calls. | ||
143 | 146 | ||
144 | The list_del(), list_add(), and list_add_tail() primitives have been | 147 | The list_del(), list_add(), and list_add_tail() primitives have been |
145 | replaced by list_del_rcu(), list_add_rcu(), and list_add_tail_rcu(). | 148 | replaced by list_del_rcu(), list_add_rcu(), and list_add_tail_rcu(). |
146 | The _rcu() list-manipulation primitives add memory barriers that are | 149 | The _rcu() list-manipulation primitives add memory barriers that are |
147 | needed on weakly ordered CPUs (most of them!). | 150 | needed on weakly ordered CPUs (most of them!). The list_del_rcu() |
151 | primitive omits the pointer poisoning debug-assist code that would | ||
152 | otherwise cause concurrent readers to fail spectacularly. | ||
148 | 153 | ||
149 | So, when readers can tolerate stale data and when entries are either added | 154 | So, when readers can tolerate stale data and when entries are either added |
150 | or deleted, without in-place modification, it is very easy to use RCU! | 155 | or deleted, without in-place modification, it is very easy to use RCU! |
@@ -166,6 +171,7 @@ otherwise, the added fields would need to be filled in): | |||
166 | struct audit_newentry *ne; | 171 | struct audit_newentry *ne; |
167 | 172 | ||
168 | write_lock(&auditsc_lock); | 173 | write_lock(&auditsc_lock); |
174 | /* Note: audit_netlink_sem held by caller. */ | ||
169 | list_for_each_entry(e, list, list) { | 175 | list_for_each_entry(e, list, list) { |
170 | if (!audit_compare_rule(rule, &e->rule)) { | 176 | if (!audit_compare_rule(rule, &e->rule)) { |
171 | e->rule.action = newaction; | 177 | e->rule.action = newaction; |
@@ -199,8 +205,7 @@ RCU ("read-copy update") its name. The RCU code is as follows: | |||
199 | audit_copy_rule(&ne->rule, &e->rule); | 205 | audit_copy_rule(&ne->rule, &e->rule); |
200 | ne->rule.action = newaction; | 206 | ne->rule.action = newaction; |
201 | ne->rule.file_count = newfield_count; | 207 | ne->rule.file_count = newfield_count; |
202 | list_add_rcu(ne, e); | 208 | list_replace_rcu(e, ne); |
203 | list_del(e); | ||
204 | call_rcu(&e->rcu, audit_free_rule, e); | 209 | call_rcu(&e->rcu, audit_free_rule, e); |
205 | return 0; | 210 | return 0; |
206 | } | 211 | } |
diff --git a/Documentation/RCU/rcu.txt b/Documentation/RCU/rcu.txt index 7e0c2ab6f2bd..eb444006683e 100644 --- a/Documentation/RCU/rcu.txt +++ b/Documentation/RCU/rcu.txt | |||
@@ -43,7 +43,9 @@ o If I am running on a uniprocessor kernel, which can only do one | |||
43 | 43 | ||
44 | o How can I see where RCU is currently used in the Linux kernel? | 44 | o How can I see where RCU is currently used in the Linux kernel? |
45 | 45 | ||
46 | Search for "rcu_read_lock", "call_rcu", and "synchronize_kernel". | 46 | Search for "rcu_read_lock", "rcu_read_unlock", "call_rcu", |
47 | "rcu_read_lock_bh", "rcu_read_unlock_bh", "call_rcu_bh", | ||
48 | "synchronize_rcu", and "synchronize_net". | ||
47 | 49 | ||
48 | o What guidelines should I follow when writing code that uses RCU? | 50 | o What guidelines should I follow when writing code that uses RCU? |
49 | 51 | ||