diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-16 11:10:07 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-03-16 11:10:07 -0400 |
| commit | 016aa2ed1cc9cf704cf76d8df07751b6daa9750f (patch) | |
| tree | bebfea796fbcaed6995f41cb4ab1333a0e09a1ff | |
| parent | 34d211a2d5df4984a35b18d8ccacbe1d10abb067 (diff) | |
| parent | 241e6663b5151733294d1a230a3fd8a4d32e187f (diff) | |
Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
smp: Document transitivity for memory barriers.
rcu: add comment saying why DEBUG_OBJECTS_RCU_HEAD depends on PREEMPT.
rcupdate: remove dead code
rcu: add documentation saying which RCU flavor to choose
rcutorture: Get rid of duplicate sched.h include
rcu: call __rcu_read_unlock() in exit_rcu for tiny RCU
| -rw-r--r-- | Documentation/RCU/whatisRCU.txt | 31 | ||||
| -rw-r--r-- | Documentation/memory-barriers.txt | 58 | ||||
| -rw-r--r-- | kernel/rcupdate.c | 10 | ||||
| -rw-r--r-- | kernel/rcutiny_plugin.h | 2 | ||||
| -rw-r--r-- | kernel/rcutorture.c | 1 |
5 files changed, 95 insertions, 7 deletions
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index cfaac34c455..6ef692667e2 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt | |||
| @@ -849,6 +849,37 @@ All: lockdep-checked RCU-protected pointer access | |||
| 849 | See the comment headers in the source code (or the docbook generated | 849 | See the comment headers in the source code (or the docbook generated |
| 850 | from them) for more information. | 850 | from them) for more information. |
| 851 | 851 | ||
| 852 | However, given that there are no fewer than four families of RCU APIs | ||
| 853 | in the Linux kernel, how do you choose which one to use? The following | ||
| 854 | list can be helpful: | ||
| 855 | |||
| 856 | a. Will readers need to block? If so, you need SRCU. | ||
| 857 | |||
| 858 | b. What about the -rt patchset? If readers would need to block | ||
| 859 | in an non-rt kernel, you need SRCU. If readers would block | ||
| 860 | in a -rt kernel, but not in a non-rt kernel, SRCU is not | ||
| 861 | necessary. | ||
| 862 | |||
| 863 | c. Do you need to treat NMI handlers, hardirq handlers, | ||
| 864 | and code segments with preemption disabled (whether | ||
| 865 | via preempt_disable(), local_irq_save(), local_bh_disable(), | ||
| 866 | or some other mechanism) as if they were explicit RCU readers? | ||
| 867 | If so, you need RCU-sched. | ||
| 868 | |||
| 869 | d. Do you need RCU grace periods to complete even in the face | ||
| 870 | of softirq monopolization of one or more of the CPUs? For | ||
| 871 | example, is your code subject to network-based denial-of-service | ||
| 872 | attacks? If so, you need RCU-bh. | ||
| 873 | |||
| 874 | e. Is your workload too update-intensive for normal use of | ||
| 875 | RCU, but inappropriate for other synchronization mechanisms? | ||
| 876 | If so, consider SLAB_DESTROY_BY_RCU. But please be careful! | ||
| 877 | |||
| 878 | f. Otherwise, use RCU. | ||
| 879 | |||
| 880 | Of course, this all assumes that you have determined that RCU is in fact | ||
| 881 | the right tool for your job. | ||
| 882 | |||
| 852 | 883 | ||
| 853 | 8. ANSWERS TO QUICK QUIZZES | 884 | 8. ANSWERS TO QUICK QUIZZES |
| 854 | 885 | ||
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 631ad2f1b22..f0d3a8026a5 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt | |||
| @@ -21,6 +21,7 @@ Contents: | |||
| 21 | - SMP barrier pairing. | 21 | - SMP barrier pairing. |
| 22 | - Examples of memory barrier sequences. | 22 | - Examples of memory barrier sequences. |
| 23 | - Read memory barriers vs load speculation. | 23 | - Read memory barriers vs load speculation. |
| 24 | - Transitivity | ||
| 24 | 25 | ||
| 25 | (*) Explicit kernel barriers. | 26 | (*) Explicit kernel barriers. |
| 26 | 27 | ||
| @@ -959,6 +960,63 @@ the speculation will be cancelled and the value reloaded: | |||
| 959 | retrieved : : +-------+ | 960 | retrieved : : +-------+ |
| 960 | 961 | ||
| 961 | 962 | ||
| 963 | TRANSITIVITY | ||
| 964 | ------------ | ||
| 965 | |||
| 966 | Transitivity is a deeply intuitive notion about ordering that is not | ||
| 967 | always provided by real computer systems. The following example | ||
| 968 | demonstrates transitivity (also called "cumulativity"): | ||
| 969 | |||
| 970 | CPU 1 CPU 2 CPU 3 | ||
| 971 | ======================= ======================= ======================= | ||
| 972 | { X = 0, Y = 0 } | ||
| 973 | STORE X=1 LOAD X STORE Y=1 | ||
| 974 | <general barrier> <general barrier> | ||
| 975 | LOAD Y LOAD X | ||
| 976 | |||
| 977 | Suppose that CPU 2's load from X returns 1 and its load from Y returns 0. | ||
| 978 | This indicates that CPU 2's load from X in some sense follows CPU 1's | ||
| 979 | store to X and that CPU 2's load from Y in some sense preceded CPU 3's | ||
| 980 | store to Y. The question is then "Can CPU 3's load from X return 0?" | ||
| 981 | |||
| 982 | Because CPU 2's load from X in some sense came after CPU 1's store, it | ||
| 983 | is natural to expect that CPU 3's load from X must therefore return 1. | ||
| 984 | This expectation is an example of transitivity: if a load executing on | ||
| 985 | CPU A follows a load from the same variable executing on CPU B, then | ||
| 986 | CPU A's load must either return the same value that CPU B's load did, | ||
| 987 | or must return some later value. | ||
| 988 | |||
| 989 | In the Linux kernel, use of general memory barriers guarantees | ||
| 990 | transitivity. Therefore, in the above example, if CPU 2's load from X | ||
| 991 | returns 1 and its load from Y returns 0, then CPU 3's load from X must | ||
| 992 | also return 1. | ||
| 993 | |||
| 994 | However, transitivity is -not- guaranteed for read or write barriers. | ||
| 995 | For example, suppose that CPU 2's general barrier in the above example | ||
| 996 | is changed to a read barrier as shown below: | ||
| 997 | |||
| 998 | CPU 1 CPU 2 CPU 3 | ||
| 999 | ======================= ======================= ======================= | ||
| 1000 | { X = 0, Y = 0 } | ||
| 1001 | STORE X=1 LOAD X STORE Y=1 | ||
| 1002 | <read barrier> <general barrier> | ||
| 1003 | LOAD Y LOAD X | ||
| 1004 | |||
| 1005 | This substitution destroys transitivity: in this example, it is perfectly | ||
| 1006 | legal for CPU 2's load from X to return 1, its load from Y to return 0, | ||
| 1007 | and CPU 3's load from X to return 0. | ||
| 1008 | |||
| 1009 | The key point is that although CPU 2's read barrier orders its pair | ||
| 1010 | of loads, it does not guarantee to order CPU 1's store. Therefore, if | ||
| 1011 | this example runs on a system where CPUs 1 and 2 share a store buffer | ||
| 1012 | or a level of cache, CPU 2 might have early access to CPU 1's writes. | ||
| 1013 | General barriers are therefore required to ensure that all CPUs agree | ||
| 1014 | on the combined order of CPU 1's and CPU 2's accesses. | ||
| 1015 | |||
| 1016 | To reiterate, if your code requires transitivity, use general barriers | ||
| 1017 | throughout. | ||
| 1018 | |||
| 1019 | |||
| 962 | ======================== | 1020 | ======================== |
| 963 | EXPLICIT KERNEL BARRIERS | 1021 | EXPLICIT KERNEL BARRIERS |
| 964 | ======================== | 1022 | ======================== |
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index a23a57a976d..f3240e98792 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c | |||
| @@ -214,11 +214,12 @@ static int rcuhead_fixup_free(void *addr, enum debug_obj_state state) | |||
| 214 | * Ensure that queued callbacks are all executed. | 214 | * Ensure that queued callbacks are all executed. |
| 215 | * If we detect that we are nested in a RCU read-side critical | 215 | * If we detect that we are nested in a RCU read-side critical |
| 216 | * section, we should simply fail, otherwise we would deadlock. | 216 | * section, we should simply fail, otherwise we would deadlock. |
| 217 | * Note that the machinery to reliably determine whether | ||
| 218 | * or not we are in an RCU read-side critical section | ||
| 219 | * exists only in the preemptible RCU implementations | ||
| 220 | * (TINY_PREEMPT_RCU and TREE_PREEMPT_RCU), which is why | ||
| 221 | * DEBUG_OBJECTS_RCU_HEAD is disallowed if !PREEMPT. | ||
| 217 | */ | 222 | */ |
| 218 | #ifndef CONFIG_PREEMPT | ||
| 219 | WARN_ON(1); | ||
| 220 | return 0; | ||
| 221 | #else | ||
| 222 | if (rcu_preempt_depth() != 0 || preempt_count() != 0 || | 223 | if (rcu_preempt_depth() != 0 || preempt_count() != 0 || |
| 223 | irqs_disabled()) { | 224 | irqs_disabled()) { |
| 224 | WARN_ON(1); | 225 | WARN_ON(1); |
| @@ -229,7 +230,6 @@ static int rcuhead_fixup_free(void *addr, enum debug_obj_state state) | |||
| 229 | rcu_barrier_bh(); | 230 | rcu_barrier_bh(); |
| 230 | debug_object_free(head, &rcuhead_debug_descr); | 231 | debug_object_free(head, &rcuhead_debug_descr); |
| 231 | return 1; | 232 | return 1; |
| 232 | #endif | ||
| 233 | default: | 233 | default: |
| 234 | return 0; | 234 | return 0; |
| 235 | } | 235 | } |
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h index 015abaea962..3cb8e362e88 100644 --- a/kernel/rcutiny_plugin.h +++ b/kernel/rcutiny_plugin.h | |||
| @@ -852,7 +852,7 @@ void exit_rcu(void) | |||
| 852 | if (t->rcu_read_lock_nesting == 0) | 852 | if (t->rcu_read_lock_nesting == 0) |
| 853 | return; | 853 | return; |
| 854 | t->rcu_read_lock_nesting = 1; | 854 | t->rcu_read_lock_nesting = 1; |
| 855 | rcu_read_unlock(); | 855 | __rcu_read_unlock(); |
| 856 | } | 856 | } |
| 857 | 857 | ||
| 858 | #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ | 858 | #else /* #ifdef CONFIG_TINY_PREEMPT_RCU */ |
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c index 89613f97ff2..c224da41890 100644 --- a/kernel/rcutorture.c +++ b/kernel/rcutorture.c | |||
| @@ -47,7 +47,6 @@ | |||
| 47 | #include <linux/srcu.h> | 47 | #include <linux/srcu.h> |
| 48 | #include <linux/slab.h> | 48 | #include <linux/slab.h> |
| 49 | #include <asm/byteorder.h> | 49 | #include <asm/byteorder.h> |
| 50 | #include <linux/sched.h> | ||
| 51 | 50 | ||
| 52 | MODULE_LICENSE("GPL"); | 51 | MODULE_LICENSE("GPL"); |
| 53 | MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " | 52 | MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com> and " |
